All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH qemu 1/9] target/riscv: rvv: Add mask agnostic for vv instructions
  2022-04-25 14:18 ` ~eopxd
@ 2022-04-25 14:18   ` ~eopxd
  -1 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-03-17  7:26 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: WeiWei Li, Frank Chang, eop Chen, Bin Meng, Alistair Francis,
	Palmer Dabbelt

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

This is the first commit regarding the mask agnostic behavior.
Added option 'rvv_ma_all_1s' to enable the behavior, the option
is default to false.

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/cpu.c                      | 1 +
 target/riscv/cpu.h                      | 2 ++
 target/riscv/cpu_helper.c               | 2 ++
 target/riscv/insn_trans/trans_rvv.c.inc | 3 +++
 target/riscv/internals.h                | 5 +++--
 target/riscv/translate.c                | 2 ++
 target/riscv/vector_helper.c            | 8 ++++++++
 7 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index cd4cf4b41e..2bf862a5e4 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -811,6 +811,7 @@ static Property riscv_cpu_properties[] = {
 
     DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
     DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false),
+    DEFINE_PROP_BOOL("rvv_ma_all_1s", RISCVCPU, cfg.rvv_ma_all_1s, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 8c4a79b5a0..c76ded515b 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -370,6 +370,7 @@ struct RISCVCPUConfig {
     bool ext_zve32f;
     bool ext_zve64f;
     bool rvv_ta_all_1s;
+    bool rvv_ma_all_1s;
 
     /* Vendor-specific custom extensions */
     bool ext_XVentanaCondOps;
@@ -518,6 +519,7 @@ FIELD(TB_FLAGS, XL, 20, 2)
 FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1)
 FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1)
 FIELD(TB_FLAGS, VTA, 24, 1)
+FIELD(TB_FLAGS, VMA, 25, 1)
 
 #ifdef TARGET_RISCV32
 #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 2941c88c31..be94d82ff8 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -67,6 +67,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
         flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
         flags = FIELD_DP32(flags, TB_FLAGS, VTA,
                     FIELD_EX64(env->vtype, VTYPE, VTA));
+        flags = FIELD_DP32(flags, TB_FLAGS, VMA,
+                    FIELD_EX64(env->vtype, VTYPE, VMA));
     } else {
         flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
     }
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 8d87501e03..4610107fb4 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1251,6 +1251,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
         data = FIELD_DP32(data, VDATA, VM, a->vm);
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                            vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
                            cpu_env, s->cfg_ptr->vlen / 8,
@@ -1567,6 +1568,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
         data = FIELD_DP32(data, VDATA, VM, a->vm);
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                            vreg_ofs(s, a->rs1),
                            vreg_ofs(s, a->rs2),
@@ -1649,6 +1651,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
         data = FIELD_DP32(data, VDATA, VM, a->vm);
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                            vreg_ofs(s, a->rs1),
                            vreg_ofs(s, a->rs2),
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 512c6c30cf..00b72fd767 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -25,8 +25,9 @@
 FIELD(VDATA, VM, 0, 1)
 FIELD(VDATA, LMUL, 1, 3)
 FIELD(VDATA, VTA, 4, 1)
-FIELD(VDATA, NF, 5, 4)
-FIELD(VDATA, WD, 5, 1)
+FIELD(VDATA, VMA, 5, 1)
+FIELD(VDATA, NF, 6, 4)
+FIELD(VDATA, WD, 6, 1)
 
 /* float point classify helpers */
 target_ulong fclass_h(uint64_t frs1);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 7775dade26..37893aa348 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -95,6 +95,7 @@ typedef struct DisasContext {
     int8_t lmul;
     uint8_t sew;
     uint8_t vta;
+    uint8_t vma;
     target_ulong vstart;
     bool vl_eq_vlmax;
     uint8_t ntemp;
@@ -1085,6 +1086,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
     ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3);
     ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s;
+    ctx->vma = FIELD_EX32(tb_flags, TB_FLAGS, VMA) && cpu->cfg.rvv_ma_all_1s;
     ctx->vstart = env->vstart;
     ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
     ctx->misa_mxl_max = env->misa_mxl_max;
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index fbde0c9248..141a06ddf0 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -127,6 +127,11 @@ static inline uint32_t vext_vta(uint32_t desc)
     return FIELD_EX32(simd_data(desc), VDATA, VTA);
 }
 
+static inline uint32_t vext_vma(uint32_t desc)
+{
+    return FIELD_EX32(simd_data(desc), VDATA, VMA);
+}
+
 /*
  * Get the maximum number of elements can be operated.
  *
@@ -797,10 +802,13 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
     uint32_t vl = env->vl;
     uint32_t total_elems = vext_get_total_elems(desc, esz);
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
     uint32_t i;
 
     for (i = env->vstart; i < vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz, (i + 1) * esz);
             continue;
         }
         fn(vd, vs1, vs2, i);
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 2/9] target/riscv: rvv: Add mask agnostic for vector load / store instructions
  2022-04-25 14:18 ` ~eopxd
@ 2022-04-25 14:18   ` ~eopxd
  -1 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-03-17  7:47 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: WeiWei Li, Frank Chang, eop Chen, Bin Meng, Alistair Francis,
	Palmer Dabbelt

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc |  9 +++++++++
 target/riscv/vector_helper.c            | 20 ++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 4610107fb4..4e141e5145 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -712,6 +712,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
 }
 
@@ -750,6 +751,7 @@ static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
 }
 
@@ -777,6 +779,7 @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, 0);
     data = FIELD_DP32(data, VDATA, NF, 1);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
 }
 
@@ -795,6 +798,7 @@ static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, 0);
     data = FIELD_DP32(data, VDATA, NF, 1);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
 }
 
@@ -867,6 +871,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
 }
 
@@ -897,6 +902,7 @@ static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     fn = fns[eew];
     if (fn == NULL) {
         return false;
@@ -998,6 +1004,7 @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
 }
 
@@ -1051,6 +1058,7 @@ static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, true);
 }
 
@@ -1117,6 +1125,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldff_trans(a->rd, a->rs1, data, fn, s);
 }
 
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 141a06ddf0..bd84b0409c 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -296,9 +296,17 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
     uint32_t max_elems = vext_max_elems(desc, log2_esz);
     uint32_t esz = 1 << log2_esz;
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
 
     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            k = 0;
+            while (k < nf) {
+                /* set masked-off elements to 1s */
+                vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i + k * max_elems,
+                    (i + k * max_elems) * esz, (i + k * max_elems + 1) * esz);
+                k++;
+            }
             continue;
         }
 
@@ -479,10 +487,18 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
     uint32_t max_elems = vext_max_elems(desc, log2_esz);
     uint32_t esz = 1 << log2_esz;
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
 
     /* load bytes from guest memory */
     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            k = 0;
+            while (k < nf) {
+                /* set masked-off elements to 1s */
+                vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i + k * max_elems,
+                    (i + k * max_elems) * esz, (i + k * max_elems + 1) * esz);
+                k++;
+            }
             continue;
         }
 
@@ -568,6 +584,7 @@ vext_ldff(void *vd, void *v0, target_ulong base,
     uint32_t max_elems = vext_max_elems(desc, log2_esz);
     uint32_t esz = 1 << log2_esz;
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
     target_ulong addr, offset, remain;
 
     /* probe every access*/
@@ -614,6 +631,9 @@ ProbeSuccess:
     for (i = env->vstart; i < env->vl; i++) {
         k = 0;
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i + k * max_elems,
+                (i + k * max_elems) * esz, (i + k * max_elems + 1) * esz);
             continue;
         }
         while (k < nf) {
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 3/9] target/riscv: rvv: Add mask agnostic for vx instructions
  2022-04-25 14:18 ` ~eopxd
@ 2022-04-25 14:18   ` ~eopxd
  -1 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-03-17  8:38 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: WeiWei Li, Frank Chang, eop Chen, Bin Meng, Alistair Francis,
	Palmer Dabbelt

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc | 2 ++
 target/riscv/vector_helper.c            | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 4e141e5145..e2bdfc0fae 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1308,6 +1308,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
     data = FIELD_DP32(data, VDATA, VM, vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8,
                                       s->cfg_ptr->vlen / 8, data));
 
@@ -1484,6 +1485,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
     data = FIELD_DP32(data, VDATA, VM, vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8,
                                       s->cfg_ptr->vlen / 8, data));
 
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index bd84b0409c..658ea0244d 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -891,10 +891,13 @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
     uint32_t vl = env->vl;
     uint32_t total_elems = vext_get_total_elems(desc, esz);
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
     uint32_t i;
 
     for (i = env->vstart; i < vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz, (i + 1) * esz);
             continue;
         }
         fn(vd, s1, vs2, i);
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 4/9] target/riscv: rvv: Add mask agnostic for vector integer shift instructions
  2022-04-25 14:18 ` ~eopxd
@ 2022-04-25 14:18   ` ~eopxd
  -1 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-03-17  8:43 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: WeiWei Li, Frank Chang, eop Chen, Bin Meng, Alistair Francis,
	Palmer Dabbelt

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc | 1 +
 target/riscv/vector_helper.c            | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index e2bdfc0fae..a085ef2c29 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1935,6 +1935,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
                            vreg_ofs(s, a->rs2), cpu_env,           \
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 658ea0244d..269e40c251 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -1291,10 +1291,14 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
     uint32_t esz = sizeof(TS1);                                           \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     uint32_t i;                                                           \
                                                                           \
     for (i = env->vstart; i < vl; i++) {                                  \
         if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
             continue;                                                     \
         }                                                                 \
         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
@@ -1332,10 +1336,15 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
     uint32_t esz = sizeof(TD);                              \
     uint32_t total_elems = vext_get_total_elems(desc, esz); \
     uint32_t vta = vext_vta(desc);                          \
+    uint32_t vma = vext_vma(desc);                          \
     uint32_t i;                                             \
                                                             \
     for (i = env->vstart; i < vl; i++) {                    \
         if (!vm && !vext_elem_mask(v0, i)) {                \
+            /* set masked-off elements to 1s */             \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i,    \
+                                             i * esz,       \
+                                             (i + 1) * esz);\
             continue;                                       \
         }                                                   \
         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 5/9] target/riscv: rvv: Add mask agnostic for vector integer comparison instructions
  2022-04-25 14:18 ` ~eopxd
@ 2022-04-25 14:18   ` ~eopxd
  -1 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-03-17  8:46 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: WeiWei Li, Frank Chang, eop Chen, Bin Meng, Alistair Francis,
	Palmer Dabbelt

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc |  1 +
 target/riscv/vector_helper.c            | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index a085ef2c29..be08ec8a2e 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1742,6 +1742,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
                            vreg_ofs(s, a->rs2), cpu_env,           \
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 269e40c251..2b341d6679 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -1401,12 +1401,17 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
     uint32_t esz = sizeof(ETYPE);                             \
     uint32_t total_elems = vext_get_total_elems(desc, esz);   \
     uint32_t vta = vext_vta(desc);                            \
+    uint32_t vma = vext_vma(desc);                            \
     uint32_t i;                                               \
                                                               \
     for (i = env->vstart; i < vl; i++) {                      \
         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
         if (!vm && !vext_elem_mask(v0, i)) {                  \
+            /* set masked-off elements to 1s */               \
+            if (vma) {                                        \
+                vext_set_elem_mask(vd, i, 1);                 \
+            }                                                 \
             continue;                                         \
         }                                                     \
         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
@@ -1459,11 +1464,16 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
     uint32_t esz = sizeof(ETYPE);                                   \
     uint32_t total_elems = vext_get_total_elems(desc, esz);         \
     uint32_t vta = vext_vta(desc);                                  \
+    uint32_t vma = vext_vma(desc);                                  \
     uint32_t i;                                                     \
                                                                     \
     for (i = env->vstart; i < vl; i++) {                            \
         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
         if (!vm && !vext_elem_mask(v0, i)) {                        \
+            /* set masked-off elements to 1s */                     \
+            if (vma) {                                              \
+                vext_set_elem_mask(vd, i, 1);                       \
+            }                                                       \
             continue;                                               \
         }                                                           \
         vext_set_elem_mask(vd, i,                                   \
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 6/9] target/riscv: rvv: Add mask agnostic for vector fix-point arithmetic instructions
  2022-04-25 14:18 ` ~eopxd
@ 2022-04-25 14:18   ` ~eopxd
  -1 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-03-17  8:52 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: WeiWei Li, Frank Chang, eop Chen, Bin Meng, Alistair Francis,
	Palmer Dabbelt

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/vector_helper.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 2b341d6679..e266a8673c 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -2129,10 +2129,12 @@ static inline void
 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
              CPURISCVState *env,
              uint32_t vl, uint32_t vm, int vxrm,
-             opivv2_rm_fn *fn)
+             opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
 {
     for (uint32_t i = env->vstart; i < vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz, (i + 1) * esz);
             continue;
         }
         fn(vd, vs1, vs2, i, env, vxrm);
@@ -2150,23 +2152,24 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
     uint32_t vl = env->vl;
     uint32_t total_elems = vext_get_total_elems(desc, esz);
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
 
     switch (env->vxrm) {
     case 0: /* rnu */
         vext_vv_rm_1(vd, v0, vs1, vs2,
-                     env, vl, vm, 0, fn);
+                     env, vl, vm, 0, fn, vma, esz);
         break;
     case 1: /* rne */
         vext_vv_rm_1(vd, v0, vs1, vs2,
-                     env, vl, vm, 1, fn);
+                     env, vl, vm, 1, fn, vma, esz);
         break;
     case 2: /* rdn */
         vext_vv_rm_1(vd, v0, vs1, vs2,
-                     env, vl, vm, 2, fn);
+                     env, vl, vm, 2, fn, vma, esz);
         break;
     default: /* rod */
         vext_vv_rm_1(vd, v0, vs1, vs2,
-                     env, vl, vm, 3, fn);
+                     env, vl, vm, 3, fn, vma, esz);
         break;
     }
     /* set tail elements to 1s */
@@ -2250,10 +2253,12 @@ static inline void
 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
              CPURISCVState *env,
              uint32_t vl, uint32_t vm, int vxrm,
-             opivx2_rm_fn *fn)
+             opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
 {
     for (uint32_t i = env->vstart; i < vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz, (i + 1) * esz);
             continue;
         }
         fn(vd, s1, vs2, i, env, vxrm);
@@ -2271,23 +2276,24 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
     uint32_t vl = env->vl;
     uint32_t total_elems = vext_get_total_elems(desc, esz);
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
 
     switch (env->vxrm) {
     case 0: /* rnu */
         vext_vx_rm_1(vd, v0, s1, vs2,
-                     env, vl, vm, 0, fn);
+                     env, vl, vm, 0, fn, vma, esz);
         break;
     case 1: /* rne */
         vext_vx_rm_1(vd, v0, s1, vs2,
-                     env, vl, vm, 1, fn);
+                     env, vl, vm, 1, fn, vma, esz);
         break;
     case 2: /* rdn */
         vext_vx_rm_1(vd, v0, s1, vs2,
-                     env, vl, vm, 2, fn);
+                     env, vl, vm, 2, fn, vma, esz);
         break;
     default: /* rod */
         vext_vx_rm_1(vd, v0, s1, vs2,
-                     env, vl, vm, 3, fn);
+                     env, vl, vm, 3, fn, vma, esz);
         break;
     }
     /* set tail elements to 1s */
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 7/9] target/riscv: rvv: Add mask agnostic for vector floating-point instructions
  2022-04-25 14:18 ` ~eopxd
@ 2022-04-25 14:18   ` ~eopxd
  -1 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-03-17  9:08 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: WeiWei Li, Frank Chang, eop Chen, Bin Meng, Alistair Francis,
	Palmer Dabbelt

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc | 12 +++++++++
 target/riscv/vector_helper.c            | 36 +++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index be08ec8a2e..e404d24bcf 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2415,6 +2415,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
                            vreg_ofs(s, a->rs2), cpu_env,           \
@@ -2498,6 +2499,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)            \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);            \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);              \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);              \
         return opfvf_trans(a->rd, a->rs1, a->rs2, data,           \
                            fns[s->sew - 1], s);                   \
     }                                                             \
@@ -2537,6 +2539,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
         data = FIELD_DP32(data, VDATA, VM, a->vm);               \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);             \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);             \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),   \
                            vreg_ofs(s, a->rs1),                  \
                            vreg_ofs(s, a->rs2), cpu_env,         \
@@ -2577,6 +2580,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
         data = FIELD_DP32(data, VDATA, VM, a->vm);               \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);             \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);             \
         return opfvf_trans(a->rd, a->rs1, a->rs2, data,          \
                            fns[s->sew - 1], s);                  \
     }                                                            \
@@ -2614,6 +2618,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
                            vreg_ofs(s, a->rs2), cpu_env,           \
@@ -2654,6 +2659,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
         data = FIELD_DP32(data, VDATA, VM, a->vm);               \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);             \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);             \
         return opfvf_trans(a->rd, a->rs1, a->rs2, data,          \
                            fns[s->sew - 1], s);                  \
     }                                                            \
@@ -2738,6 +2744,7 @@ static bool do_opfv(DisasContext *s, arg_rmr *a,
         data = FIELD_DP32(data, VDATA, VM, a->vm);
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                            vreg_ofs(s, a->rs2), cpu_env,
                            s->cfg_ptr->vlen / 8,
@@ -2852,6 +2859,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
             TCGv_i32 desc;
             uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
             data = FIELD_DP32(data, VDATA, VTA, s->vta);
+            data = FIELD_DP32(data, VDATA, VMA, s->vma);
             static gen_helper_vmv_vx * const fns[3] = {
                 gen_helper_vmv_v_x_h,
                 gen_helper_vmv_v_x_w,
@@ -2953,6 +2961,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs2), cpu_env,           \
                            s->cfg_ptr->vlen / 8,                   \
@@ -3005,6 +3014,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
                                                                    \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs2), cpu_env,           \
                            s->cfg_ptr->vlen / 8,                   \
@@ -3073,6 +3083,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs2), cpu_env,           \
                            s->cfg_ptr->vlen / 8,                   \
@@ -3127,6 +3138,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
                                                                    \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs2), cpu_env,           \
                            s->cfg_ptr->vlen / 8,                   \
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index e266a8673c..0af65e5423 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -3051,10 +3051,16 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
     uint32_t total_elems =                                \
         vext_get_total_elems(desc, ESZ);                  \
     uint32_t vta = vext_vta(desc);                        \
+    uint32_t vma = vext_vma(desc);                        \
     uint32_t i;                                           \
                                                           \
     for (i = env->vstart; i < vl; i++) {                  \
         if (!vm && !vext_elem_mask(v0, i)) {              \
+            /* set masked-off elements to 1s */           \
+            vext_set_elems_1s_fns[ctzl(ESZ)](vd, vma, i,  \
+                                             i * ESZ,     \
+                                             (i + 1) *    \
+                                             ESZ);        \
             continue;                                     \
         }                                                 \
         do_##NAME(vd, vs1, vs2, i, env);                  \
@@ -3091,10 +3097,16 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
     uint32_t total_elems =                                \
         vext_get_total_elems(desc, ESZ);                  \
     uint32_t vta = vext_vta(desc);                        \
+    uint32_t vma = vext_vma(desc);                        \
     uint32_t i;                                           \
                                                           \
     for (i = env->vstart; i < vl; i++) {                  \
         if (!vm && !vext_elem_mask(v0, i)) {              \
+            /* set masked-off elements to 1s */           \
+            vext_set_elems_1s_fns[ctzl(ESZ)](vd, vma, i,  \
+                                             i * ESZ,     \
+                                             (i + 1) *    \
+                                             ESZ);        \
             continue;                                     \
         }                                                 \
         do_##NAME(vd, s1, vs2, i, env);                   \
@@ -3667,6 +3679,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
     uint32_t total_elems =                             \
         vext_get_total_elems(desc, ESZ);               \
     uint32_t vta = vext_vta(desc);                     \
+    uint32_t vma = vext_vma(desc);                     \
     uint32_t i;                                        \
                                                        \
     if (vl == 0) {                                     \
@@ -3674,6 +3687,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
     }                                                  \
     for (i = env->vstart; i < vl; i++) {               \
         if (!vm && !vext_elem_mask(v0, i)) {           \
+            /* set masked-off elements to 1s */        \
+            vext_set_elems_1s_fns[ctzl(ESZ)](vd, vma,  \
+                                             i,        \
+                                             i * ESZ,  \
+                                             (i + 1)   \
+                                             * ESZ);   \
             continue;                                  \
         }                                              \
         do_##NAME(vd, vs2, i, env);                    \
@@ -4188,12 +4207,17 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
     uint32_t esz = sizeof(ETYPE);                             \
     uint32_t total_elems = vext_get_total_elems(desc, esz);   \
     uint32_t vta = vext_vta(desc);                            \
+    uint32_t vma = vext_vma(desc);                            \
     uint32_t i;                                               \
                                                               \
     for (i = env->vstart; i < vl; i++) {                      \
         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
         if (!vm && !vext_elem_mask(v0, i)) {                  \
+            /* set masked-off elements to 1s */               \
+            if (vma) {                                        \
+                vext_set_elem_mask(vd, i, 1);                 \
+            }                                                 \
             continue;                                         \
         }                                                     \
         vext_set_elem_mask(vd, i,                             \
@@ -4221,11 +4245,16 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
     uint32_t esz = sizeof(ETYPE);                                   \
     uint32_t total_elems = vext_get_total_elems(desc, esz);         \
     uint32_t vta = vext_vta(desc);                                  \
+    uint32_t vma = vext_vma(desc);                                  \
     uint32_t i;                                                     \
                                                                     \
     for (i = env->vstart; i < vl; i++) {                            \
         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
         if (!vm && !vext_elem_mask(v0, i)) {                        \
+            /* set masked-off elements to 1s */                     \
+            if (vma) {                                              \
+                vext_set_elem_mask(vd, i, 1);                       \
+            }                                                       \
             continue;                                               \
         }                                                           \
         vext_set_elem_mask(vd, i,                                   \
@@ -4347,10 +4376,17 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
     uint32_t total_elems =                             \
         vext_get_total_elems(desc, ESZ);               \
     uint32_t vta = vext_vta(desc);                     \
+    uint32_t vma = vext_vma(desc);                     \
     uint32_t i;                                        \
                                                        \
     for (i = env->vstart; i < vl; i++) {               \
         if (!vm && !vext_elem_mask(v0, i)) {           \
+            /* set masked-off elements to 1s */        \
+            vext_set_elems_1s_fns[ctzl(ESZ)](vd, vma,  \
+                                             i,        \
+                                             i * ESZ,  \
+                                             (i + 1) * \
+                                             ESZ);     \
             continue;                                  \
         }                                              \
         do_##NAME(vd, vs2, i);                         \
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 8/9] target/riscv: rvv: Add mask agnostic for vector mask instructions
  2022-04-25 14:18 ` ~eopxd
@ 2022-04-25 14:18   ` ~eopxd
  -1 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-03-17  9:14 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: WeiWei Li, Frank Chang, eop Chen, Bin Meng, Alistair Francis,
	Palmer Dabbelt

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc |  3 +++
 target/riscv/vector_helper.c            | 13 +++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index e404d24bcf..69d00c7116 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -3333,6 +3333,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd),                     \
                            vreg_ofs(s, 0), vreg_ofs(s, a->rs2),    \
                            cpu_env, s->cfg_ptr->vlen / 8,          \
@@ -3371,6 +3372,7 @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
         data = FIELD_DP32(data, VDATA, VM, a->vm);
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
         static gen_helper_gvec_3_ptr * const fns[4] = {
             gen_helper_viota_m_b, gen_helper_viota_m_h,
             gen_helper_viota_m_w, gen_helper_viota_m_d,
@@ -3401,6 +3403,7 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
         data = FIELD_DP32(data, VDATA, VM, a->vm);
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
         static gen_helper_gvec_2_ptr * const fns[4] = {
             gen_helper_vid_v_b, gen_helper_vid_v_h,
             gen_helper_vid_v_w, gen_helper_vid_v_d,
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 0af65e5423..600ccad513 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4895,11 +4895,16 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
     uint32_t vl = env->vl;
     uint32_t total_elems = env_archcpu(env)->cfg.vlen;
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
     int i;
     bool first_mask_bit = false;
 
     for (i = env->vstart; i < vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            if (vma) {
+                vext_set_elem_mask(vd, i, 1);
+            }
             continue;
         }
         /* write a zero to all following active elements */
@@ -4959,11 +4964,15 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
     uint32_t esz = sizeof(ETYPE);                                         \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     uint32_t sum = 0;                                                     \
     int i;                                                                \
                                                                           \
     for (i = env->vstart; i < vl; i++) {                                  \
         if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
             continue;                                                     \
         }                                                                 \
         *((ETYPE *)vd + H(i)) = sum;                                      \
@@ -4991,10 +5000,14 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
     uint32_t esz = sizeof(ETYPE);                                         \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     int i;                                                                \
                                                                           \
     for (i = env->vstart; i < vl; i++) {                                  \
         if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
             continue;                                                     \
         }                                                                 \
         *((ETYPE *)vd + H(i)) = i;                                        \
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 9/9] target/riscv: rvv: Add mask agnostic for vector permutation instructions
  2022-04-25 14:18 ` ~eopxd
@ 2022-04-25 14:18   ` ~eopxd
  -1 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-03-17  9:32 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: WeiWei Li, Frank Chang, eop Chen, Bin Meng, Alistair Francis,
	Palmer Dabbelt

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc |  1 +
 target/riscv/vector_helper.c            | 34 +++++++++++++++++++++++--
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 69d00c7116..3858a0479a 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -3975,6 +3975,7 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
 
     data = FIELD_DP32(data, VDATA, VM, a->vm);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
 
     tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                        vreg_ofs(s, a->rs2), cpu_env,
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 600ccad513..e87806ed64 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -5037,11 +5037,15 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
     uint32_t esz = sizeof(ETYPE);                                         \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     target_ulong offset = s1, i_min, i;                                   \
                                                                           \
     i_min = MAX(env->vstart, offset);                                     \
     for (i = i_min; i < vl; i++) {                                        \
         if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
             continue;                                                     \
         }                                                                 \
         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
@@ -5067,13 +5071,18 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
     uint32_t esz = sizeof(ETYPE);                                         \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     target_ulong i_max, i;                                                \
                                                                           \
     i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
     for (i = env->vstart; i < i_max; ++i) {                               \
-        if (vm || vext_elem_mask(v0, i)) {                                \
-            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
+        if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
+            continue;                                                     \
         }                                                                 \
+        *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));              \
     }                                                                     \
                                                                           \
     for (i = i_max; i < vl; ++i) {                                        \
@@ -5104,10 +5113,14 @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1,       \
     uint32_t esz = sizeof(ETYPE);                                           \
     uint32_t total_elems = vext_get_total_elems(desc, esz);                 \
     uint32_t vta = vext_vta(desc);                                          \
+    uint32_t vma = vext_vma(desc);                                          \
     uint32_t i;                                                             \
                                                                             \
     for (i = env->vstart; i < vl; i++) {                                    \
         if (!vm && !vext_elem_mask(v0, i)) {                                \
+            /* set masked-off elements to 1s */                             \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,           \
+                                             (i + 1) * esz);                \
             continue;                                                       \
         }                                                                   \
         if (i == 0) {                                                       \
@@ -5150,10 +5163,14 @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1,       \
     uint32_t esz = sizeof(ETYPE);                                             \
     uint32_t total_elems = vext_get_total_elems(desc, esz);                   \
     uint32_t vta = vext_vta(desc);                                            \
+    uint32_t vma = vext_vma(desc);                                            \
     uint32_t i;                                                               \
                                                                               \
     for (i = env->vstart; i < vl; i++) {                                      \
         if (!vm && !vext_elem_mask(v0, i)) {                                  \
+            /* set masked-off elements to 1s */                               \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,             \
+                                             (i + 1) * esz);                  \
             continue;                                                         \
         }                                                                     \
         if (i == vl - 1) {                                                    \
@@ -5222,11 +5239,15 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
     uint32_t esz = sizeof(TS2);                                           \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     uint64_t index;                                                       \
     uint32_t i;                                                           \
                                                                           \
     for (i = env->vstart; i < vl; i++) {                                  \
         if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
             continue;                                                     \
         }                                                                 \
         index = *((TS1 *)vs1 + HS1(i));                                   \
@@ -5263,11 +5284,15 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
     uint32_t esz = sizeof(ETYPE);                                         \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     uint64_t index = s1;                                                  \
     uint32_t i;                                                           \
                                                                           \
     for (i = env->vstart; i < vl; i++) {                                  \
         if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
             continue;                                                     \
         }                                                                 \
         if (index >= vlmax) {                                             \
@@ -5349,10 +5374,15 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
     uint32_t esz = sizeof(ETYPE);                                \
     uint32_t total_elems = vext_get_total_elems(desc, esz);      \
     uint32_t vta = vext_vta(desc);                               \
+    uint32_t vma = vext_vma(desc);                               \
     uint32_t i;                                                  \
                                                                  \
     for (i = env->vstart; i < vl; i++) {                         \
         if (!vm && !vext_elem_mask(v0, i)) {                     \
+            /* set masked-off elements to 1s */                  \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i,         \
+                                             i * esz,            \
+                                             (i + 1) * esz);     \
             continue;                                            \
         }                                                        \
         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
-- 
2.34.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 0/9] Add mask agnostic behavior for rvv instructions
@ 2022-04-25 14:18 ` ~eopxd
  0 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-04-25 14:18 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: WeiWei Li, Frank Chang, eop Chen, Bin Meng, Alistair Francis,
	Palmer Dabbelt

According to v-spec, agnostic elements can be either kept as undisturbed
or set elements' bits to all 1s. To distinguish the difference of mask
policies, QEMU should be able to simulate the mask agnostic behavior as
"set masked-off elements' bits to all 1s". An option 'rvv_ma_all_1s' is
added to enable the behavior, it is default as disabled.

This patch set is based on the patch set "Add tail agnostic behavior for
rvv instructions".
Based on: <164863587444.17401.9965527486691250478-0@git.sr.ht>

Yueh-Ting (eop) Chen (9):
  target/riscv: rvv: Add mask agnostic for vv instructions
  target/riscv: rvv: Add mask agnostic for vector load / store
    instructions
  target/riscv: rvv: Add mask agnostic for vx instructions
  target/riscv: rvv: Add mask agnostic for vector integer shift
    instructions
  target/riscv: rvv: Add mask agnostic for vector integer comparison
    instructions
  target/riscv: rvv: Add mask agnostic for vector fix-point arithmetic
    instructions
  target/riscv: rvv: Add mask agnostic for vector floating-point
    instructions
  target/riscv: rvv: Add mask agnostic for vector mask instructions
  target/riscv: rvv: Add mask agnostic for vector permutation
    instructions

 target/riscv/cpu.c                      |   1 +
 target/riscv/cpu.h                      |   2 +
 target/riscv/cpu_helper.c               |   2 +
 target/riscv/insn_trans/trans_rvv.c.inc |  32 +++++
 target/riscv/internals.h                |   5 +-
 target/riscv/translate.c                |   2 +
 target/riscv/vector_helper.c            | 159 ++++++++++++++++++++++--
 7 files changed, 189 insertions(+), 14 deletions(-)

-- 
2.34.2


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH qemu 0/9] Add mask agnostic behavior for rvv instructions
@ 2022-04-25 14:18 ` ~eopxd
  0 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-04-25 14:18 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Palmer Dabbelt, Alistair Francis, Bin Meng, Frank Chang,
	WeiWei Li, eop Chen

According to v-spec, agnostic elements can be either kept as undisturbed
or set elements' bits to all 1s. To distinguish the difference of mask
policies, QEMU should be able to simulate the mask agnostic behavior as
"set masked-off elements' bits to all 1s". An option 'rvv_ma_all_1s' is
added to enable the behavior, it is default as disabled.

This patch set is based on the patch set "Add tail agnostic behavior for
rvv instructions".
Based on: <164863587444.17401.9965527486691250478-0@git.sr.ht>

Yueh-Ting (eop) Chen (9):
  target/riscv: rvv: Add mask agnostic for vv instructions
  target/riscv: rvv: Add mask agnostic for vector load / store
    instructions
  target/riscv: rvv: Add mask agnostic for vx instructions
  target/riscv: rvv: Add mask agnostic for vector integer shift
    instructions
  target/riscv: rvv: Add mask agnostic for vector integer comparison
    instructions
  target/riscv: rvv: Add mask agnostic for vector fix-point arithmetic
    instructions
  target/riscv: rvv: Add mask agnostic for vector floating-point
    instructions
  target/riscv: rvv: Add mask agnostic for vector mask instructions
  target/riscv: rvv: Add mask agnostic for vector permutation
    instructions

 target/riscv/cpu.c                      |   1 +
 target/riscv/cpu.h                      |   2 +
 target/riscv/cpu_helper.c               |   2 +
 target/riscv/insn_trans/trans_rvv.c.inc |  32 +++++
 target/riscv/internals.h                |   5 +-
 target/riscv/translate.c                |   2 +
 target/riscv/vector_helper.c            | 159 ++++++++++++++++++++++--
 7 files changed, 189 insertions(+), 14 deletions(-)

-- 
2.34.2


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH qemu 3/9] target/riscv: rvv: Add mask agnostic for vx instructions
@ 2022-04-25 14:18   ` ~eopxd
  0 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-04-25 14:18 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Palmer Dabbelt, Alistair Francis, Bin Meng, Frank Chang,
	WeiWei Li, eop Chen

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc | 2 ++
 target/riscv/vector_helper.c            | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 4e141e5145..e2bdfc0fae 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1308,6 +1308,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
     data = FIELD_DP32(data, VDATA, VM, vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8,
                                       s->cfg_ptr->vlen / 8, data));
 
@@ -1484,6 +1485,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
     data = FIELD_DP32(data, VDATA, VM, vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8,
                                       s->cfg_ptr->vlen / 8, data));
 
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index bd84b0409c..658ea0244d 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -891,10 +891,13 @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
     uint32_t vl = env->vl;
     uint32_t total_elems = vext_get_total_elems(desc, esz);
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
     uint32_t i;
 
     for (i = env->vstart; i < vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz, (i + 1) * esz);
             continue;
         }
         fn(vd, s1, vs2, i);
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 1/9] target/riscv: rvv: Add mask agnostic for vv instructions
@ 2022-04-25 14:18   ` ~eopxd
  0 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-04-25 14:18 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Palmer Dabbelt, Alistair Francis, Bin Meng, Frank Chang,
	WeiWei Li, eop Chen

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

This is the first commit regarding the mask agnostic behavior.
Added option 'rvv_ma_all_1s' to enable the behavior, the option
is default to false.

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/cpu.c                      | 1 +
 target/riscv/cpu.h                      | 2 ++
 target/riscv/cpu_helper.c               | 2 ++
 target/riscv/insn_trans/trans_rvv.c.inc | 3 +++
 target/riscv/internals.h                | 5 +++--
 target/riscv/translate.c                | 2 ++
 target/riscv/vector_helper.c            | 8 ++++++++
 7 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index cd4cf4b41e..2bf862a5e4 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -811,6 +811,7 @@ static Property riscv_cpu_properties[] = {
 
     DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
     DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false),
+    DEFINE_PROP_BOOL("rvv_ma_all_1s", RISCVCPU, cfg.rvv_ma_all_1s, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 8c4a79b5a0..c76ded515b 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -370,6 +370,7 @@ struct RISCVCPUConfig {
     bool ext_zve32f;
     bool ext_zve64f;
     bool rvv_ta_all_1s;
+    bool rvv_ma_all_1s;
 
     /* Vendor-specific custom extensions */
     bool ext_XVentanaCondOps;
@@ -518,6 +519,7 @@ FIELD(TB_FLAGS, XL, 20, 2)
 FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1)
 FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1)
 FIELD(TB_FLAGS, VTA, 24, 1)
+FIELD(TB_FLAGS, VMA, 25, 1)
 
 #ifdef TARGET_RISCV32
 #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 2941c88c31..be94d82ff8 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -67,6 +67,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
         flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
         flags = FIELD_DP32(flags, TB_FLAGS, VTA,
                     FIELD_EX64(env->vtype, VTYPE, VTA));
+        flags = FIELD_DP32(flags, TB_FLAGS, VMA,
+                    FIELD_EX64(env->vtype, VTYPE, VMA));
     } else {
         flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
     }
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 8d87501e03..4610107fb4 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1251,6 +1251,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
         data = FIELD_DP32(data, VDATA, VM, a->vm);
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                            vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
                            cpu_env, s->cfg_ptr->vlen / 8,
@@ -1567,6 +1568,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
         data = FIELD_DP32(data, VDATA, VM, a->vm);
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                            vreg_ofs(s, a->rs1),
                            vreg_ofs(s, a->rs2),
@@ -1649,6 +1651,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
         data = FIELD_DP32(data, VDATA, VM, a->vm);
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                            vreg_ofs(s, a->rs1),
                            vreg_ofs(s, a->rs2),
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 512c6c30cf..00b72fd767 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -25,8 +25,9 @@
 FIELD(VDATA, VM, 0, 1)
 FIELD(VDATA, LMUL, 1, 3)
 FIELD(VDATA, VTA, 4, 1)
-FIELD(VDATA, NF, 5, 4)
-FIELD(VDATA, WD, 5, 1)
+FIELD(VDATA, VMA, 5, 1)
+FIELD(VDATA, NF, 6, 4)
+FIELD(VDATA, WD, 6, 1)
 
 /* float point classify helpers */
 target_ulong fclass_h(uint64_t frs1);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 7775dade26..37893aa348 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -95,6 +95,7 @@ typedef struct DisasContext {
     int8_t lmul;
     uint8_t sew;
     uint8_t vta;
+    uint8_t vma;
     target_ulong vstart;
     bool vl_eq_vlmax;
     uint8_t ntemp;
@@ -1085,6 +1086,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
     ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3);
     ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s;
+    ctx->vma = FIELD_EX32(tb_flags, TB_FLAGS, VMA) && cpu->cfg.rvv_ma_all_1s;
     ctx->vstart = env->vstart;
     ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
     ctx->misa_mxl_max = env->misa_mxl_max;
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index fbde0c9248..141a06ddf0 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -127,6 +127,11 @@ static inline uint32_t vext_vta(uint32_t desc)
     return FIELD_EX32(simd_data(desc), VDATA, VTA);
 }
 
+static inline uint32_t vext_vma(uint32_t desc)
+{
+    return FIELD_EX32(simd_data(desc), VDATA, VMA);
+}
+
 /*
  * Get the maximum number of elements can be operated.
  *
@@ -797,10 +802,13 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
     uint32_t vl = env->vl;
     uint32_t total_elems = vext_get_total_elems(desc, esz);
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
     uint32_t i;
 
     for (i = env->vstart; i < vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz, (i + 1) * esz);
             continue;
         }
         fn(vd, vs1, vs2, i);
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 5/9] target/riscv: rvv: Add mask agnostic for vector integer comparison instructions
@ 2022-04-25 14:18   ` ~eopxd
  0 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-04-25 14:18 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Palmer Dabbelt, Alistair Francis, Bin Meng, Frank Chang,
	WeiWei Li, eop Chen

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc |  1 +
 target/riscv/vector_helper.c            | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index a085ef2c29..be08ec8a2e 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1742,6 +1742,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
                            vreg_ofs(s, a->rs2), cpu_env,           \
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 269e40c251..2b341d6679 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -1401,12 +1401,17 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
     uint32_t esz = sizeof(ETYPE);                             \
     uint32_t total_elems = vext_get_total_elems(desc, esz);   \
     uint32_t vta = vext_vta(desc);                            \
+    uint32_t vma = vext_vma(desc);                            \
     uint32_t i;                                               \
                                                               \
     for (i = env->vstart; i < vl; i++) {                      \
         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
         if (!vm && !vext_elem_mask(v0, i)) {                  \
+            /* set masked-off elements to 1s */               \
+            if (vma) {                                        \
+                vext_set_elem_mask(vd, i, 1);                 \
+            }                                                 \
             continue;                                         \
         }                                                     \
         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
@@ -1459,11 +1464,16 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
     uint32_t esz = sizeof(ETYPE);                                   \
     uint32_t total_elems = vext_get_total_elems(desc, esz);         \
     uint32_t vta = vext_vta(desc);                                  \
+    uint32_t vma = vext_vma(desc);                                  \
     uint32_t i;                                                     \
                                                                     \
     for (i = env->vstart; i < vl; i++) {                            \
         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
         if (!vm && !vext_elem_mask(v0, i)) {                        \
+            /* set masked-off elements to 1s */                     \
+            if (vma) {                                              \
+                vext_set_elem_mask(vd, i, 1);                       \
+            }                                                       \
             continue;                                               \
         }                                                           \
         vext_set_elem_mask(vd, i,                                   \
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 9/9] target/riscv: rvv: Add mask agnostic for vector permutation instructions
@ 2022-04-25 14:18   ` ~eopxd
  0 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-04-25 14:18 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Palmer Dabbelt, Alistair Francis, Bin Meng, Frank Chang,
	WeiWei Li, eop Chen

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc |  1 +
 target/riscv/vector_helper.c            | 34 +++++++++++++++++++++++--
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 69d00c7116..3858a0479a 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -3975,6 +3975,7 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
 
     data = FIELD_DP32(data, VDATA, VM, a->vm);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
 
     tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                        vreg_ofs(s, a->rs2), cpu_env,
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 600ccad513..e87806ed64 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -5037,11 +5037,15 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
     uint32_t esz = sizeof(ETYPE);                                         \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     target_ulong offset = s1, i_min, i;                                   \
                                                                           \
     i_min = MAX(env->vstart, offset);                                     \
     for (i = i_min; i < vl; i++) {                                        \
         if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
             continue;                                                     \
         }                                                                 \
         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
@@ -5067,13 +5071,18 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
     uint32_t esz = sizeof(ETYPE);                                         \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     target_ulong i_max, i;                                                \
                                                                           \
     i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
     for (i = env->vstart; i < i_max; ++i) {                               \
-        if (vm || vext_elem_mask(v0, i)) {                                \
-            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
+        if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
+            continue;                                                     \
         }                                                                 \
+        *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));              \
     }                                                                     \
                                                                           \
     for (i = i_max; i < vl; ++i) {                                        \
@@ -5104,10 +5113,14 @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1,       \
     uint32_t esz = sizeof(ETYPE);                                           \
     uint32_t total_elems = vext_get_total_elems(desc, esz);                 \
     uint32_t vta = vext_vta(desc);                                          \
+    uint32_t vma = vext_vma(desc);                                          \
     uint32_t i;                                                             \
                                                                             \
     for (i = env->vstart; i < vl; i++) {                                    \
         if (!vm && !vext_elem_mask(v0, i)) {                                \
+            /* set masked-off elements to 1s */                             \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,           \
+                                             (i + 1) * esz);                \
             continue;                                                       \
         }                                                                   \
         if (i == 0) {                                                       \
@@ -5150,10 +5163,14 @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1,       \
     uint32_t esz = sizeof(ETYPE);                                             \
     uint32_t total_elems = vext_get_total_elems(desc, esz);                   \
     uint32_t vta = vext_vta(desc);                                            \
+    uint32_t vma = vext_vma(desc);                                            \
     uint32_t i;                                                               \
                                                                               \
     for (i = env->vstart; i < vl; i++) {                                      \
         if (!vm && !vext_elem_mask(v0, i)) {                                  \
+            /* set masked-off elements to 1s */                               \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,             \
+                                             (i + 1) * esz);                  \
             continue;                                                         \
         }                                                                     \
         if (i == vl - 1) {                                                    \
@@ -5222,11 +5239,15 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
     uint32_t esz = sizeof(TS2);                                           \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     uint64_t index;                                                       \
     uint32_t i;                                                           \
                                                                           \
     for (i = env->vstart; i < vl; i++) {                                  \
         if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
             continue;                                                     \
         }                                                                 \
         index = *((TS1 *)vs1 + HS1(i));                                   \
@@ -5263,11 +5284,15 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
     uint32_t esz = sizeof(ETYPE);                                         \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     uint64_t index = s1;                                                  \
     uint32_t i;                                                           \
                                                                           \
     for (i = env->vstart; i < vl; i++) {                                  \
         if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
             continue;                                                     \
         }                                                                 \
         if (index >= vlmax) {                                             \
@@ -5349,10 +5374,15 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
     uint32_t esz = sizeof(ETYPE);                                \
     uint32_t total_elems = vext_get_total_elems(desc, esz);      \
     uint32_t vta = vext_vta(desc);                               \
+    uint32_t vma = vext_vma(desc);                               \
     uint32_t i;                                                  \
                                                                  \
     for (i = env->vstart; i < vl; i++) {                         \
         if (!vm && !vext_elem_mask(v0, i)) {                     \
+            /* set masked-off elements to 1s */                  \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i,         \
+                                             i * esz,            \
+                                             (i + 1) * esz);     \
             continue;                                            \
         }                                                        \
         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
-- 
2.34.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 2/9] target/riscv: rvv: Add mask agnostic for vector load / store instructions
@ 2022-04-25 14:18   ` ~eopxd
  0 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-04-25 14:18 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Palmer Dabbelt, Alistair Francis, Bin Meng, Frank Chang,
	WeiWei Li, eop Chen

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc |  9 +++++++++
 target/riscv/vector_helper.c            | 20 ++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 4610107fb4..4e141e5145 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -712,6 +712,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
 }
 
@@ -750,6 +751,7 @@ static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
 }
 
@@ -777,6 +779,7 @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, 0);
     data = FIELD_DP32(data, VDATA, NF, 1);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
 }
 
@@ -795,6 +798,7 @@ static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, 0);
     data = FIELD_DP32(data, VDATA, NF, 1);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
 }
 
@@ -867,6 +871,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
 }
 
@@ -897,6 +902,7 @@ static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     fn = fns[eew];
     if (fn == NULL) {
         return false;
@@ -998,6 +1004,7 @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
 }
 
@@ -1051,6 +1058,7 @@ static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, true);
 }
 
@@ -1117,6 +1125,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
     data = FIELD_DP32(data, VDATA, LMUL, emul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
     data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
     return ldff_trans(a->rd, a->rs1, data, fn, s);
 }
 
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 141a06ddf0..bd84b0409c 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -296,9 +296,17 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
     uint32_t max_elems = vext_max_elems(desc, log2_esz);
     uint32_t esz = 1 << log2_esz;
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
 
     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            k = 0;
+            while (k < nf) {
+                /* set masked-off elements to 1s */
+                vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i + k * max_elems,
+                    (i + k * max_elems) * esz, (i + k * max_elems + 1) * esz);
+                k++;
+            }
             continue;
         }
 
@@ -479,10 +487,18 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
     uint32_t max_elems = vext_max_elems(desc, log2_esz);
     uint32_t esz = 1 << log2_esz;
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
 
     /* load bytes from guest memory */
     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            k = 0;
+            while (k < nf) {
+                /* set masked-off elements to 1s */
+                vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i + k * max_elems,
+                    (i + k * max_elems) * esz, (i + k * max_elems + 1) * esz);
+                k++;
+            }
             continue;
         }
 
@@ -568,6 +584,7 @@ vext_ldff(void *vd, void *v0, target_ulong base,
     uint32_t max_elems = vext_max_elems(desc, log2_esz);
     uint32_t esz = 1 << log2_esz;
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
     target_ulong addr, offset, remain;
 
     /* probe every access*/
@@ -614,6 +631,9 @@ ProbeSuccess:
     for (i = env->vstart; i < env->vl; i++) {
         k = 0;
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i + k * max_elems,
+                (i + k * max_elems) * esz, (i + k * max_elems + 1) * esz);
             continue;
         }
         while (k < nf) {
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 4/9] target/riscv: rvv: Add mask agnostic for vector integer shift instructions
@ 2022-04-25 14:18   ` ~eopxd
  0 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-04-25 14:18 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Palmer Dabbelt, Alistair Francis, Bin Meng, Frank Chang,
	WeiWei Li, eop Chen

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc | 1 +
 target/riscv/vector_helper.c            | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index e2bdfc0fae..a085ef2c29 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1935,6 +1935,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
                            vreg_ofs(s, a->rs2), cpu_env,           \
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 658ea0244d..269e40c251 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -1291,10 +1291,14 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
     uint32_t esz = sizeof(TS1);                                           \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     uint32_t i;                                                           \
                                                                           \
     for (i = env->vstart; i < vl; i++) {                                  \
         if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
             continue;                                                     \
         }                                                                 \
         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
@@ -1332,10 +1336,15 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
     uint32_t esz = sizeof(TD);                              \
     uint32_t total_elems = vext_get_total_elems(desc, esz); \
     uint32_t vta = vext_vta(desc);                          \
+    uint32_t vma = vext_vma(desc);                          \
     uint32_t i;                                             \
                                                             \
     for (i = env->vstart; i < vl; i++) {                    \
         if (!vm && !vext_elem_mask(v0, i)) {                \
+            /* set masked-off elements to 1s */             \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i,    \
+                                             i * esz,       \
+                                             (i + 1) * esz);\
             continue;                                       \
         }                                                   \
         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 8/9] target/riscv: rvv: Add mask agnostic for vector mask instructions
@ 2022-04-25 14:18   ` ~eopxd
  0 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-04-25 14:18 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Palmer Dabbelt, Alistair Francis, Bin Meng, Frank Chang,
	WeiWei Li, eop Chen

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc |  3 +++
 target/riscv/vector_helper.c            | 13 +++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index e404d24bcf..69d00c7116 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -3333,6 +3333,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd),                     \
                            vreg_ofs(s, 0), vreg_ofs(s, a->rs2),    \
                            cpu_env, s->cfg_ptr->vlen / 8,          \
@@ -3371,6 +3372,7 @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
         data = FIELD_DP32(data, VDATA, VM, a->vm);
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
         static gen_helper_gvec_3_ptr * const fns[4] = {
             gen_helper_viota_m_b, gen_helper_viota_m_h,
             gen_helper_viota_m_w, gen_helper_viota_m_d,
@@ -3401,6 +3403,7 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
         data = FIELD_DP32(data, VDATA, VM, a->vm);
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
         static gen_helper_gvec_2_ptr * const fns[4] = {
             gen_helper_vid_v_b, gen_helper_vid_v_h,
             gen_helper_vid_v_w, gen_helper_vid_v_d,
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 0af65e5423..600ccad513 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4895,11 +4895,16 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
     uint32_t vl = env->vl;
     uint32_t total_elems = env_archcpu(env)->cfg.vlen;
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
     int i;
     bool first_mask_bit = false;
 
     for (i = env->vstart; i < vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            if (vma) {
+                vext_set_elem_mask(vd, i, 1);
+            }
             continue;
         }
         /* write a zero to all following active elements */
@@ -4959,11 +4964,15 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
     uint32_t esz = sizeof(ETYPE);                                         \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     uint32_t sum = 0;                                                     \
     int i;                                                                \
                                                                           \
     for (i = env->vstart; i < vl; i++) {                                  \
         if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
             continue;                                                     \
         }                                                                 \
         *((ETYPE *)vd + H(i)) = sum;                                      \
@@ -4991,10 +5000,14 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
     uint32_t esz = sizeof(ETYPE);                                         \
     uint32_t total_elems = vext_get_total_elems(desc, esz);               \
     uint32_t vta = vext_vta(desc);                                        \
+    uint32_t vma = vext_vma(desc);                                        \
     int i;                                                                \
                                                                           \
     for (i = env->vstart; i < vl; i++) {                                  \
         if (!vm && !vext_elem_mask(v0, i)) {                              \
+            /* set masked-off elements to 1s */                           \
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz,         \
+                                             (i + 1) * esz);              \
             continue;                                                     \
         }                                                                 \
         *((ETYPE *)vd + H(i)) = i;                                        \
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 7/9] target/riscv: rvv: Add mask agnostic for vector floating-point instructions
@ 2022-04-25 14:18   ` ~eopxd
  0 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-04-25 14:18 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Palmer Dabbelt, Alistair Francis, Bin Meng, Frank Chang,
	WeiWei Li, eop Chen

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc | 12 +++++++++
 target/riscv/vector_helper.c            | 36 +++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index be08ec8a2e..e404d24bcf 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2415,6 +2415,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
                            vreg_ofs(s, a->rs2), cpu_env,           \
@@ -2498,6 +2499,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)            \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);            \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);              \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);              \
         return opfvf_trans(a->rd, a->rs1, a->rs2, data,           \
                            fns[s->sew - 1], s);                   \
     }                                                             \
@@ -2537,6 +2539,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
         data = FIELD_DP32(data, VDATA, VM, a->vm);               \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);             \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);             \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),   \
                            vreg_ofs(s, a->rs1),                  \
                            vreg_ofs(s, a->rs2), cpu_env,         \
@@ -2577,6 +2580,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
         data = FIELD_DP32(data, VDATA, VM, a->vm);               \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);             \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);             \
         return opfvf_trans(a->rd, a->rs1, a->rs2, data,          \
                            fns[s->sew - 1], s);                  \
     }                                                            \
@@ -2614,6 +2618,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
                            vreg_ofs(s, a->rs2), cpu_env,           \
@@ -2654,6 +2659,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
         data = FIELD_DP32(data, VDATA, VM, a->vm);               \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);             \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);             \
         return opfvf_trans(a->rd, a->rs1, a->rs2, data,          \
                            fns[s->sew - 1], s);                  \
     }                                                            \
@@ -2738,6 +2744,7 @@ static bool do_opfv(DisasContext *s, arg_rmr *a,
         data = FIELD_DP32(data, VDATA, VM, a->vm);
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                            vreg_ofs(s, a->rs2), cpu_env,
                            s->cfg_ptr->vlen / 8,
@@ -2852,6 +2859,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
             TCGv_i32 desc;
             uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
             data = FIELD_DP32(data, VDATA, VTA, s->vta);
+            data = FIELD_DP32(data, VDATA, VMA, s->vma);
             static gen_helper_vmv_vx * const fns[3] = {
                 gen_helper_vmv_v_x_h,
                 gen_helper_vmv_v_x_w,
@@ -2953,6 +2961,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs2), cpu_env,           \
                            s->cfg_ptr->vlen / 8,                   \
@@ -3005,6 +3014,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
                                                                    \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs2), cpu_env,           \
                            s->cfg_ptr->vlen / 8,                   \
@@ -3073,6 +3083,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs2), cpu_env,           \
                            s->cfg_ptr->vlen / 8,                   \
@@ -3127,6 +3138,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
                                                                    \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs2), cpu_env,           \
                            s->cfg_ptr->vlen / 8,                   \
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index e266a8673c..0af65e5423 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -3051,10 +3051,16 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
     uint32_t total_elems =                                \
         vext_get_total_elems(desc, ESZ);                  \
     uint32_t vta = vext_vta(desc);                        \
+    uint32_t vma = vext_vma(desc);                        \
     uint32_t i;                                           \
                                                           \
     for (i = env->vstart; i < vl; i++) {                  \
         if (!vm && !vext_elem_mask(v0, i)) {              \
+            /* set masked-off elements to 1s */           \
+            vext_set_elems_1s_fns[ctzl(ESZ)](vd, vma, i,  \
+                                             i * ESZ,     \
+                                             (i + 1) *    \
+                                             ESZ);        \
             continue;                                     \
         }                                                 \
         do_##NAME(vd, vs1, vs2, i, env);                  \
@@ -3091,10 +3097,16 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
     uint32_t total_elems =                                \
         vext_get_total_elems(desc, ESZ);                  \
     uint32_t vta = vext_vta(desc);                        \
+    uint32_t vma = vext_vma(desc);                        \
     uint32_t i;                                           \
                                                           \
     for (i = env->vstart; i < vl; i++) {                  \
         if (!vm && !vext_elem_mask(v0, i)) {              \
+            /* set masked-off elements to 1s */           \
+            vext_set_elems_1s_fns[ctzl(ESZ)](vd, vma, i,  \
+                                             i * ESZ,     \
+                                             (i + 1) *    \
+                                             ESZ);        \
             continue;                                     \
         }                                                 \
         do_##NAME(vd, s1, vs2, i, env);                   \
@@ -3667,6 +3679,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
     uint32_t total_elems =                             \
         vext_get_total_elems(desc, ESZ);               \
     uint32_t vta = vext_vta(desc);                     \
+    uint32_t vma = vext_vma(desc);                     \
     uint32_t i;                                        \
                                                        \
     if (vl == 0) {                                     \
@@ -3674,6 +3687,12 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
     }                                                  \
     for (i = env->vstart; i < vl; i++) {               \
         if (!vm && !vext_elem_mask(v0, i)) {           \
+            /* set masked-off elements to 1s */        \
+            vext_set_elems_1s_fns[ctzl(ESZ)](vd, vma,  \
+                                             i,        \
+                                             i * ESZ,  \
+                                             (i + 1)   \
+                                             * ESZ);   \
             continue;                                  \
         }                                              \
         do_##NAME(vd, vs2, i, env);                    \
@@ -4188,12 +4207,17 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
     uint32_t esz = sizeof(ETYPE);                             \
     uint32_t total_elems = vext_get_total_elems(desc, esz);   \
     uint32_t vta = vext_vta(desc);                            \
+    uint32_t vma = vext_vma(desc);                            \
     uint32_t i;                                               \
                                                               \
     for (i = env->vstart; i < vl; i++) {                      \
         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
         if (!vm && !vext_elem_mask(v0, i)) {                  \
+            /* set masked-off elements to 1s */               \
+            if (vma) {                                        \
+                vext_set_elem_mask(vd, i, 1);                 \
+            }                                                 \
             continue;                                         \
         }                                                     \
         vext_set_elem_mask(vd, i,                             \
@@ -4221,11 +4245,16 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
     uint32_t esz = sizeof(ETYPE);                                   \
     uint32_t total_elems = vext_get_total_elems(desc, esz);         \
     uint32_t vta = vext_vta(desc);                                  \
+    uint32_t vma = vext_vma(desc);                                  \
     uint32_t i;                                                     \
                                                                     \
     for (i = env->vstart; i < vl; i++) {                            \
         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
         if (!vm && !vext_elem_mask(v0, i)) {                        \
+            /* set masked-off elements to 1s */                     \
+            if (vma) {                                              \
+                vext_set_elem_mask(vd, i, 1);                       \
+            }                                                       \
             continue;                                               \
         }                                                           \
         vext_set_elem_mask(vd, i,                                   \
@@ -4347,10 +4376,17 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
     uint32_t total_elems =                             \
         vext_get_total_elems(desc, ESZ);               \
     uint32_t vta = vext_vta(desc);                     \
+    uint32_t vma = vext_vma(desc);                     \
     uint32_t i;                                        \
                                                        \
     for (i = env->vstart; i < vl; i++) {               \
         if (!vm && !vext_elem_mask(v0, i)) {           \
+            /* set masked-off elements to 1s */        \
+            vext_set_elems_1s_fns[ctzl(ESZ)](vd, vma,  \
+                                             i,        \
+                                             i * ESZ,  \
+                                             (i + 1) * \
+                                             ESZ);     \
             continue;                                  \
         }                                              \
         do_##NAME(vd, vs2, i);                         \
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH qemu 6/9] target/riscv: rvv: Add mask agnostic for vector fix-point arithmetic instructions
@ 2022-04-25 14:18   ` ~eopxd
  0 siblings, 0 replies; 27+ messages in thread
From: ~eopxd @ 2022-04-25 14:18 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Palmer Dabbelt, Alistair Francis, Bin Meng, Frank Chang,
	WeiWei Li, eop Chen

From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/vector_helper.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 2b341d6679..e266a8673c 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -2129,10 +2129,12 @@ static inline void
 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
              CPURISCVState *env,
              uint32_t vl, uint32_t vm, int vxrm,
-             opivv2_rm_fn *fn)
+             opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
 {
     for (uint32_t i = env->vstart; i < vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz, (i + 1) * esz);
             continue;
         }
         fn(vd, vs1, vs2, i, env, vxrm);
@@ -2150,23 +2152,24 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
     uint32_t vl = env->vl;
     uint32_t total_elems = vext_get_total_elems(desc, esz);
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
 
     switch (env->vxrm) {
     case 0: /* rnu */
         vext_vv_rm_1(vd, v0, vs1, vs2,
-                     env, vl, vm, 0, fn);
+                     env, vl, vm, 0, fn, vma, esz);
         break;
     case 1: /* rne */
         vext_vv_rm_1(vd, v0, vs1, vs2,
-                     env, vl, vm, 1, fn);
+                     env, vl, vm, 1, fn, vma, esz);
         break;
     case 2: /* rdn */
         vext_vv_rm_1(vd, v0, vs1, vs2,
-                     env, vl, vm, 2, fn);
+                     env, vl, vm, 2, fn, vma, esz);
         break;
     default: /* rod */
         vext_vv_rm_1(vd, v0, vs1, vs2,
-                     env, vl, vm, 3, fn);
+                     env, vl, vm, 3, fn, vma, esz);
         break;
     }
     /* set tail elements to 1s */
@@ -2250,10 +2253,12 @@ static inline void
 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
              CPURISCVState *env,
              uint32_t vl, uint32_t vm, int vxrm,
-             opivx2_rm_fn *fn)
+             opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
 {
     for (uint32_t i = env->vstart; i < vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz, (i + 1) * esz);
             continue;
         }
         fn(vd, s1, vs2, i, env, vxrm);
@@ -2271,23 +2276,24 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
     uint32_t vl = env->vl;
     uint32_t total_elems = vext_get_total_elems(desc, esz);
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
 
     switch (env->vxrm) {
     case 0: /* rnu */
         vext_vx_rm_1(vd, v0, s1, vs2,
-                     env, vl, vm, 0, fn);
+                     env, vl, vm, 0, fn, vma, esz);
         break;
     case 1: /* rne */
         vext_vx_rm_1(vd, v0, s1, vs2,
-                     env, vl, vm, 1, fn);
+                     env, vl, vm, 1, fn, vma, esz);
         break;
     case 2: /* rdn */
         vext_vx_rm_1(vd, v0, s1, vs2,
-                     env, vl, vm, 2, fn);
+                     env, vl, vm, 2, fn, vma, esz);
         break;
     default: /* rod */
         vext_vx_rm_1(vd, v0, s1, vs2,
-                     env, vl, vm, 3, fn);
+                     env, vl, vm, 3, fn, vma, esz);
         break;
     }
     /* set tail elements to 1s */
-- 
2.34.2



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH qemu 1/9] target/riscv: rvv: Add mask agnostic for vv instructions
  2022-04-25 14:18   ` ~eopxd
@ 2022-04-26  8:47     ` Weiwei Li
  -1 siblings, 0 replies; 27+ messages in thread
From: Weiwei Li @ 2022-04-26  8:47 UTC (permalink / raw)
  To: ~eopxd, qemu-devel, qemu-riscv
  Cc: WeiWei Li, Frank Chang, Bin Meng, Alistair Francis, eop Chen,
	Palmer Dabbelt

[-- Attachment #1: Type: text/plain, Size: 7274 bytes --]


在 2022/3/17 下午3:26, ~eopxd 写道:
> From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>
>
> This is the first commit regarding the mask agnostic behavior.
> Added option 'rvv_ma_all_1s' to enable the behavior, the option
> is default to false.
>
> Signed-off-by: eop Chen <eop.chen@sifive.com>
> Reviewed-by: Frank Chang <frank.chang@sifive.com>
> ---
>   target/riscv/cpu.c                      | 1 +
>   target/riscv/cpu.h                      | 2 ++
>   target/riscv/cpu_helper.c               | 2 ++
>   target/riscv/insn_trans/trans_rvv.c.inc | 3 +++
>   target/riscv/internals.h                | 5 +++--
>   target/riscv/translate.c                | 2 ++
>   target/riscv/vector_helper.c            | 8 ++++++++
>   7 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index cd4cf4b41e..2bf862a5e4 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -811,6 +811,7 @@ static Property riscv_cpu_properties[] = {
>   
>       DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
>       DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false),
> +    DEFINE_PROP_BOOL("rvv_ma_all_1s", RISCVCPU, cfg.rvv_ma_all_1s, false),
>       DEFINE_PROP_END_OF_LIST(),
>   };
>   
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 8c4a79b5a0..c76ded515b 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -370,6 +370,7 @@ struct RISCVCPUConfig {
>       bool ext_zve32f;
>       bool ext_zve64f;
>       bool rvv_ta_all_1s;
> +    bool rvv_ma_all_1s;
>   
>       /* Vendor-specific custom extensions */
>       bool ext_XVentanaCondOps;
> @@ -518,6 +519,7 @@ FIELD(TB_FLAGS, XL, 20, 2)
>   FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1)
>   FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1)
>   FIELD(TB_FLAGS, VTA, 24, 1)
> +FIELD(TB_FLAGS, VMA, 25, 1)
>   
>   #ifdef TARGET_RISCV32
>   #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 2941c88c31..be94d82ff8 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -67,6 +67,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
>           flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
>           flags = FIELD_DP32(flags, TB_FLAGS, VTA,
>                       FIELD_EX64(env->vtype, VTYPE, VTA));
> +        flags = FIELD_DP32(flags, TB_FLAGS, VMA,
> +                    FIELD_EX64(env->vtype, VTYPE, VMA));
>       } else {
>           flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
>       }
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index 8d87501e03..4610107fb4 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -1251,6 +1251,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
>           data = FIELD_DP32(data, VDATA, VM, a->vm);
>           data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>           data = FIELD_DP32(data, VDATA, VTA, s->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, s->vma);
>           tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
>                              vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
>                              cpu_env, s->cfg_ptr->vlen / 8,
> @@ -1567,6 +1568,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
>           data = FIELD_DP32(data, VDATA, VM, a->vm);
>           data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>           data = FIELD_DP32(data, VDATA, VTA, s->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, s->vma);
>           tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
>                              vreg_ofs(s, a->rs1),
>                              vreg_ofs(s, a->rs2),
> @@ -1649,6 +1651,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
>           data = FIELD_DP32(data, VDATA, VM, a->vm);
>           data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>           data = FIELD_DP32(data, VDATA, VTA, s->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, s->vma);
>           tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
>                              vreg_ofs(s, a->rs1),
>                              vreg_ofs(s, a->rs2),
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index 512c6c30cf..00b72fd767 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -25,8 +25,9 @@
>   FIELD(VDATA, VM, 0, 1)
>   FIELD(VDATA, LMUL, 1, 3)
>   FIELD(VDATA, VTA, 4, 1)
> -FIELD(VDATA, NF, 5, 4)
> -FIELD(VDATA, WD, 5, 1)
> +FIELD(VDATA, VMA, 5, 1)
> +FIELD(VDATA, NF, 6, 4)
> +FIELD(VDATA, WD, 6, 1)
>   
>   /* float point classify helpers */
>   target_ulong fclass_h(uint64_t frs1);
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 7775dade26..37893aa348 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -95,6 +95,7 @@ typedef struct DisasContext {
>       int8_t lmul;
>       uint8_t sew;
>       uint8_t vta;
> +    uint8_t vma;
>       target_ulong vstart;
>       bool vl_eq_vlmax;
>       uint8_t ntemp;
> @@ -1085,6 +1086,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>       ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
>       ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3);
>       ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s;
> +    ctx->vma = FIELD_EX32(tb_flags, TB_FLAGS, VMA) && cpu->cfg.rvv_ma_all_1s;
>       ctx->vstart = env->vstart;
>       ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
>       ctx->misa_mxl_max = env->misa_mxl_max;
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index fbde0c9248..141a06ddf0 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -127,6 +127,11 @@ static inline uint32_t vext_vta(uint32_t desc)
>       return FIELD_EX32(simd_data(desc), VDATA, VTA);
>   }
>   
> +static inline uint32_t vext_vma(uint32_t desc)
> +{
> +    return FIELD_EX32(simd_data(desc), VDATA, VMA);
> +}
> +
>   /*
>    * Get the maximum number of elements can be operated.
>    *
> @@ -797,10 +802,13 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
>       uint32_t vl = env->vl;
>       uint32_t total_elems = vext_get_total_elems(desc, esz);
>       uint32_t vta = vext_vta(desc);
> +    uint32_t vma = vext_vma(desc);
>       uint32_t i;
>   
>       for (i = env->vstart; i < vl; i++) {
>           if (!vm && !vext_elem_mask(v0, i)) {
> +            /* set masked-off elements to 1s */
> +            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz, (i + 1) * esz);

Similar to our last discussion,  vext_set_elems_1s_fns array can be 
simplified to single vext_set_elems_1s,

since the fourth argement can be used as the start offset.

Another question, may be not related to this patchset, in section 3.4.3 
of the spec:

/"Mask destination tail elements are always treated as tail-agnostic, 
regardless of the setting of vta."/

What does "Mask destination tail elements" mean?

Regards,

Weiwei Li

>               continue;
>           }
>           fn(vd, vs1, vs2, i);

[-- Attachment #2: Type: text/html, Size: 7900 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH qemu 1/9] target/riscv: rvv: Add mask agnostic for vv instructions
@ 2022-04-26  8:47     ` Weiwei Li
  0 siblings, 0 replies; 27+ messages in thread
From: Weiwei Li @ 2022-04-26  8:47 UTC (permalink / raw)
  To: ~eopxd, qemu-devel, qemu-riscv
  Cc: WeiWei Li, Frank Chang, eop Chen, Bin Meng, Alistair Francis,
	Palmer Dabbelt

[-- Attachment #1: Type: text/plain, Size: 7274 bytes --]


在 2022/3/17 下午3:26, ~eopxd 写道:
> From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>
>
> This is the first commit regarding the mask agnostic behavior.
> Added option 'rvv_ma_all_1s' to enable the behavior, the option
> is default to false.
>
> Signed-off-by: eop Chen <eop.chen@sifive.com>
> Reviewed-by: Frank Chang <frank.chang@sifive.com>
> ---
>   target/riscv/cpu.c                      | 1 +
>   target/riscv/cpu.h                      | 2 ++
>   target/riscv/cpu_helper.c               | 2 ++
>   target/riscv/insn_trans/trans_rvv.c.inc | 3 +++
>   target/riscv/internals.h                | 5 +++--
>   target/riscv/translate.c                | 2 ++
>   target/riscv/vector_helper.c            | 8 ++++++++
>   7 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index cd4cf4b41e..2bf862a5e4 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -811,6 +811,7 @@ static Property riscv_cpu_properties[] = {
>   
>       DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
>       DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false),
> +    DEFINE_PROP_BOOL("rvv_ma_all_1s", RISCVCPU, cfg.rvv_ma_all_1s, false),
>       DEFINE_PROP_END_OF_LIST(),
>   };
>   
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 8c4a79b5a0..c76ded515b 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -370,6 +370,7 @@ struct RISCVCPUConfig {
>       bool ext_zve32f;
>       bool ext_zve64f;
>       bool rvv_ta_all_1s;
> +    bool rvv_ma_all_1s;
>   
>       /* Vendor-specific custom extensions */
>       bool ext_XVentanaCondOps;
> @@ -518,6 +519,7 @@ FIELD(TB_FLAGS, XL, 20, 2)
>   FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1)
>   FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1)
>   FIELD(TB_FLAGS, VTA, 24, 1)
> +FIELD(TB_FLAGS, VMA, 25, 1)
>   
>   #ifdef TARGET_RISCV32
>   #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 2941c88c31..be94d82ff8 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -67,6 +67,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
>           flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
>           flags = FIELD_DP32(flags, TB_FLAGS, VTA,
>                       FIELD_EX64(env->vtype, VTYPE, VTA));
> +        flags = FIELD_DP32(flags, TB_FLAGS, VMA,
> +                    FIELD_EX64(env->vtype, VTYPE, VMA));
>       } else {
>           flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
>       }
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index 8d87501e03..4610107fb4 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -1251,6 +1251,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
>           data = FIELD_DP32(data, VDATA, VM, a->vm);
>           data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>           data = FIELD_DP32(data, VDATA, VTA, s->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, s->vma);
>           tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
>                              vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
>                              cpu_env, s->cfg_ptr->vlen / 8,
> @@ -1567,6 +1568,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
>           data = FIELD_DP32(data, VDATA, VM, a->vm);
>           data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>           data = FIELD_DP32(data, VDATA, VTA, s->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, s->vma);
>           tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
>                              vreg_ofs(s, a->rs1),
>                              vreg_ofs(s, a->rs2),
> @@ -1649,6 +1651,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
>           data = FIELD_DP32(data, VDATA, VM, a->vm);
>           data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>           data = FIELD_DP32(data, VDATA, VTA, s->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, s->vma);
>           tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
>                              vreg_ofs(s, a->rs1),
>                              vreg_ofs(s, a->rs2),
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index 512c6c30cf..00b72fd767 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -25,8 +25,9 @@
>   FIELD(VDATA, VM, 0, 1)
>   FIELD(VDATA, LMUL, 1, 3)
>   FIELD(VDATA, VTA, 4, 1)
> -FIELD(VDATA, NF, 5, 4)
> -FIELD(VDATA, WD, 5, 1)
> +FIELD(VDATA, VMA, 5, 1)
> +FIELD(VDATA, NF, 6, 4)
> +FIELD(VDATA, WD, 6, 1)
>   
>   /* float point classify helpers */
>   target_ulong fclass_h(uint64_t frs1);
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 7775dade26..37893aa348 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -95,6 +95,7 @@ typedef struct DisasContext {
>       int8_t lmul;
>       uint8_t sew;
>       uint8_t vta;
> +    uint8_t vma;
>       target_ulong vstart;
>       bool vl_eq_vlmax;
>       uint8_t ntemp;
> @@ -1085,6 +1086,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>       ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
>       ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3);
>       ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s;
> +    ctx->vma = FIELD_EX32(tb_flags, TB_FLAGS, VMA) && cpu->cfg.rvv_ma_all_1s;
>       ctx->vstart = env->vstart;
>       ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
>       ctx->misa_mxl_max = env->misa_mxl_max;
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index fbde0c9248..141a06ddf0 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -127,6 +127,11 @@ static inline uint32_t vext_vta(uint32_t desc)
>       return FIELD_EX32(simd_data(desc), VDATA, VTA);
>   }
>   
> +static inline uint32_t vext_vma(uint32_t desc)
> +{
> +    return FIELD_EX32(simd_data(desc), VDATA, VMA);
> +}
> +
>   /*
>    * Get the maximum number of elements can be operated.
>    *
> @@ -797,10 +802,13 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
>       uint32_t vl = env->vl;
>       uint32_t total_elems = vext_get_total_elems(desc, esz);
>       uint32_t vta = vext_vta(desc);
> +    uint32_t vma = vext_vma(desc);
>       uint32_t i;
>   
>       for (i = env->vstart; i < vl; i++) {
>           if (!vm && !vext_elem_mask(v0, i)) {
> +            /* set masked-off elements to 1s */
> +            vext_set_elems_1s_fns[ctzl(esz)](vd, vma, i, i * esz, (i + 1) * esz);

Similar to our last discussion,  vext_set_elems_1s_fns array can be 
simplified to single vext_set_elems_1s,

since the fourth argement can be used as the start offset.

Another question, may be not related to this patchset, in section 3.4.3 
of the spec:

/"Mask destination tail elements are always treated as tail-agnostic, 
regardless of the setting of vta."/

What does "Mask destination tail elements" mean?

Regards,

Weiwei Li

>               continue;
>           }
>           fn(vd, vs1, vs2, i);

[-- Attachment #2: Type: text/html, Size: 7900 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH qemu 1/9] target/riscv: rvv: Add mask agnostic for vv instructions
  2022-04-26  8:47     ` Weiwei Li
  (?)
@ 2022-04-26 18:20     ` eop Chen
  2022-04-27  1:01       ` Weiwei Li
  -1 siblings, 1 reply; 27+ messages in thread
From: eop Chen @ 2022-04-26 18:20 UTC (permalink / raw)
  To: Weiwei Li, qemu-devel, qemu-riscv

[-- Attachment #1: Type: text/plain, Size: 1295 bytes --]


> Weiwei Li <liweiwei@iscas.ac.cn> 於 2022年4月26日 下午4:47 寫道:
> 在 2022/3/17 下午3:26, ~eopxd 写道:
>> From: Yueh-Ting (eop) Chen <eop.chen@sifive.com> <mailto:eop.chen@sifive.com>
>> 
>> This is the first commit regarding the mask agnostic behavior.
>> Added option 'rvv_ma_all_1s' to enable the behavior, the option
>> is default to false.
>> 
>> Signed-off-by: eop Chen <eop.chen@sifive.com> <mailto:eop.chen@sifive.com>
>> Reviewed-by: Frank Chang <frank.chang@sifive.com> <mailto:frank.chang@sifive.com>
> Similar to our last discussion,  vext_set_elems_1s_fns array can be simplified to single vext_set_elems_1s,
> 
> since the fourth argement can be used as the start offset. 
> 
> Another question, may be not related to this patchset, in section 3.4.3 of the spec: 
> 
> "Mask destination tail elements are always treated as tail-agnostic, regardless of the setting of vta."
> 
> What does "Mask destination tail elements" mean?
> 
> Regards,
> 
> Weiwei Li
> 


I have just updated a new version for the tail agnostic patch set, it also includes a bug fix discovered today.

Regarding the question, mask / masked-off are for body elements and respects the mask policy, and tail elements respect the tail policy.

Regards,

eop Chen

[-- Attachment #2: Type: text/html, Size: 2395 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH qemu 1/9] target/riscv: rvv: Add mask agnostic for vv instructions
  2022-04-26 18:20     ` eop Chen
@ 2022-04-27  1:01       ` Weiwei Li
  2022-04-27  2:07         ` eop Chen
  0 siblings, 1 reply; 27+ messages in thread
From: Weiwei Li @ 2022-04-27  1:01 UTC (permalink / raw)
  To: eop Chen, Weiwei Li, qemu-devel, qemu-riscv

[-- Attachment #1: Type: text/plain, Size: 2206 bytes --]


在 2022/4/27 上午2:20, eop Chen 写道:
>
>> Weiwei Li <liweiwei@iscas.ac.cn <mailto:liweiwei@iscas.ac.cn>> 於 
>> 2022年4月26日 下午4:47 寫道:
>> 在 2022/3/17 下午3:26, ~eopxd 写道:
>>> From: Yueh-Ting (eop) Chen<eop.chen@sifive.com>
>>>
>>> This is the first commit regarding the mask agnostic behavior.
>>> Added option 'rvv_ma_all_1s' to enable the behavior, the option
>>> is default to false.
>>>
>>> Signed-off-by: eop Chen<eop.chen@sifive.com>
>>> Reviewed-by: Frank Chang<frank.chang@sifive.com>
>>
>> Similar to our last discussion, vext_set_elems_1s_fns array can be 
>> simplified to single vext_set_elems_1s,
>>
>> since the fourth argement can be used as the start offset.
>>
>> Another question, may be not related to this patchset, in section 
>> 3.4.3 of the spec:
>>
>> /"Mask destination tail elements are always treated as tail-agnostic, 
>> regardless of the setting of vta."/
>>
>> What does "Mask destination tail elements" mean?
>>
>> Regards,
>>
>> Weiwei Li
>>
>
> I have just updated a new version for the tail agnostic patch set, it 
> also includes a bug fix discovered today.
>
> Regarding the question, mask / masked-off are for body elements and 
> respects the mask policy, and tail elements respect the tail policy.
>
> Regards,
>
> eop Chen

I find another descriptions in the spec. For the instructions that write 
mask register (such as vmadc, vmseq,vmsne,vmfeq...), they all have 
similar description

(You can search "tail-agnostic" in the spec):

/Section 11.4: "Because these instructions produce a mask value, they 
always operate with a tail-agnostic policy"//
/

/Section 11.8/13.13: "Compares //write mask registers, and so always 
operate under a tail-agnostic policy"//
/

/Section 15.1: "Mask elements past vl, the tail elements, are always 
updated with a tail-agnostic policy"//
/

//

/Section 15.4/15.5/15.6: "The tail elements in the destination mask 
register are updated under a tail-agnostic policy"/

So I think "Mask destination tail elements" may means the tail element 
for instructions that take mask register as destination register.  For 
these instructions,

maybe the setting of VTA can be ignored.

Regards,

Weiwei Li


[-- Attachment #2: Type: text/html, Size: 4463 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH qemu 1/9] target/riscv: rvv: Add mask agnostic for vv instructions
  2022-04-27  1:01       ` Weiwei Li
@ 2022-04-27  2:07         ` eop Chen
  2022-04-27  3:27           ` Weiwei Li
  0 siblings, 1 reply; 27+ messages in thread
From: eop Chen @ 2022-04-27  2:07 UTC (permalink / raw)
  To: Weiwei Li, qemu-devel, qemu-riscv

[-- Attachment #1: Type: text/plain, Size: 2593 bytes --]



> 
> 在 2022/4/27 上午2:20, eop Chen 写道:
>> 
>>> Weiwei Li <liweiwei@iscas.ac.cn <mailto:liweiwei@iscas.ac.cn>> 於 2022年4月26日 下午4:47 寫道:
>>> 在 2022/3/17 下午3:26, ~eopxd 写道:
>>>> From: Yueh-Ting (eop) Chen <eop.chen@sifive.com> <mailto:eop.chen@sifive.com>
>>>> 
>>>> This is the first commit regarding the mask agnostic behavior.
>>>> Added option 'rvv_ma_all_1s' to enable the behavior, the option
>>>> is default to false.
>>>> 
>>>> Signed-off-by: eop Chen <eop.chen@sifive.com> <mailto:eop.chen@sifive.com>
>>>> Reviewed-by: Frank Chang <frank.chang@sifive.com> <mailto:frank.chang@sifive.com>
>>> Similar to our last discussion,  vext_set_elems_1s_fns array can be simplified to single vext_set_elems_1s,
>>> 
>>> since the fourth argement can be used as the start offset. 
>>> 
>>> Another question, may be not related to this patchset, in section 3.4.3 of the spec: 
>>> 
>>> "Mask destination tail elements are always treated as tail-agnostic, regardless of the setting of vta."
>>> 
>>> What does "Mask destination tail elements" mean?
>>> 
>>> Regards,
>>> 
>>> Weiwei Li
>>> 
>> 
>> 
>> I have just updated a new version for the tail agnostic patch set, it also includes a bug fix discovered today.
>> 
>> Regarding the question, mask / masked-off are for body elements and respects the mask policy, and tail elements respect the tail policy.
>> 
>> Regards,
>> 
>> eop Chen
> 
> I find another descriptions in the spec. For the instructions that write mask register (such as vmadc, vmseq,vmsne,vmfeq...), they all have similar description
> 
> (You can search "tail-agnostic" in the spec):
> 
> Section 11.4: "Because these instructions produce a mask value, they always operate with a tail-agnostic policy"
> 
> Section 11.8/13.13: "Compares write mask registers, and so always operate under a tail-agnostic policy"
> 
> Section 15.1: "Mask elements past vl, the tail elements, are always updated with a tail-agnostic policy"
> 
> Section 15.4/15.5/15.6: "The tail elements in the destination mask register are updated under a tail-agnostic policy"
> 
> So I think "Mask destination tail elements" may means the tail element for instructions that take mask register as destination register.  For these instructions, 
> 
> maybe the setting of VTA can be ignored.  
> 
> Regards,
> 
> Weiwei Li
> 

Yes, the setting of VTA should be ignored when v-spec specifies.
I think the implementation of the tail agnostic patch set don’t need to change on this.

Regards,

eop Chen




[-- Attachment #2: Type: text/html, Size: 5282 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH qemu 1/9] target/riscv: rvv: Add mask agnostic for vv instructions
  2022-04-27  2:07         ` eop Chen
@ 2022-04-27  3:27           ` Weiwei Li
  2022-04-27  3:43             ` eop Chen
  0 siblings, 1 reply; 27+ messages in thread
From: Weiwei Li @ 2022-04-27  3:27 UTC (permalink / raw)
  To: eop Chen, qemu-devel, qemu-riscv

[-- Attachment #1: Type: text/plain, Size: 2970 bytes --]


在 2022/4/27 上午10:07, eop Chen 写道:
>
>
>>
>> 在 2022/4/27 上午2:20, eop Chen 写道:
>>>
>>>> Weiwei Li <liweiwei@iscas.ac.cn <mailto:liweiwei@iscas.ac.cn>> 於 
>>>> 2022年4月26日 下午4:47 寫道:
>>>> 在 2022/3/17 下午3:26, ~eopxd 写道:
>>>>> From: Yueh-Ting (eop) Chen<eop.chen@sifive.com>
>>>>>
>>>>> This is the first commit regarding the mask agnostic behavior.
>>>>> Added option 'rvv_ma_all_1s' to enable the behavior, the option
>>>>> is default to false.
>>>>>
>>>>> Signed-off-by: eop Chen<eop.chen@sifive.com>
>>>>> Reviewed-by: Frank Chang<frank.chang@sifive.com>
>>>>
>>>> Similar to our last discussion, vext_set_elems_1s_fns array can be 
>>>> simplified to single vext_set_elems_1s,
>>>>
>>>> since the fourth argement can be used as the start offset.
>>>>
>>>> Another question, may be not related to this patchset, in section 
>>>> 3.4.3 of the spec:
>>>>
>>>> /"Mask destination tail elements are always treated as 
>>>> tail-agnostic, regardless of the setting of vta."/
>>>>
>>>> What does "Mask destination tail elements" mean?
>>>>
>>>> Regards,
>>>>
>>>> Weiwei Li
>>>>
>>>
>>> I have just updated a new version for the tail agnostic patch set, 
>>> it also includes a bug fix discovered today.
>>>
>>> Regarding the question, mask / masked-off are for body elements and 
>>> respects the mask policy, and tail elements respect the tail policy.
>>>
>>> Regards,
>>>
>>> eop Chen
>>
>> I find another descriptions in the spec. For the instructions that 
>> write mask register (such as vmadc, vmseq,vmsne,vmfeq...), they all 
>> have similar description
>>
>> (You can search "tail-agnostic" in the spec):
>>
>> /Section 11.4: "Because these instructions produce a mask value, they 
>> always operate with a tail-agnostic policy"//
>> /
>>
>> /Section 11.8/13.13: "Compares //write mask registers, and so always 
>> operate under a tail-agnostic policy"//
>> /
>>
>> /Section 15.1: "Mask elements past vl, the tail elements, are always 
>> updated with a tail-agnostic policy"//
>> /
>>
>> //
>>
>> /Section 15.4/15.5/15.6: "The tail elements in the destination mask 
>> register are updated under a tail-agnostic policy"/
>>
>> So I think "Mask destination tail elements" may means the tail 
>> element for instructions that take mask register as destination 
>> register.  For these instructions,
>>
>> maybe the setting of VTA can be ignored.
>>
>> Regards,
>>
>> Weiwei Li
>>
>
> Yes, the setting of VTA should be ignored when v-spec specifies.
> I think the implementation of the tail agnostic patch set don’t need 
> to change on this.

Sorry. I don't get your idea?

In current patch, these instructions seems need to set the tail elements 
to 1s when vta is true which means

VTA is setted and rvv_ta_all_1s is enabled. If setting of VTA should be 
ignored for these instrucitons,

they will set the tail elements to 1s only when rvv_ta_all_1s is enabled.

Regards,

Weiwei Li

>
> Regards,
>
> eop Chen
>
>
>

[-- Attachment #2: Type: text/html, Size: 7576 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH qemu 1/9] target/riscv: rvv: Add mask agnostic for vv instructions
  2022-04-27  3:27           ` Weiwei Li
@ 2022-04-27  3:43             ` eop Chen
  0 siblings, 0 replies; 27+ messages in thread
From: eop Chen @ 2022-04-27  3:43 UTC (permalink / raw)
  To: Weiwei Li, qemu-devel, qemu-riscv

[-- Attachment #1: Type: text/plain, Size: 3399 bytes --]



> Weiwei Li <liweiwei@iscas.ac.cn> 於 2022年4月27日 上午11:27 寫道:
> 
> 
> 
> 在 2022/4/27 上午10:07, eop Chen 写道:
>> 
>> 
>>> 
>>> 在 2022/4/27 上午2:20, eop Chen 写道:
>>>> 
>>>>> Weiwei Li <liweiwei@iscas.ac.cn <mailto:liweiwei@iscas.ac.cn>> 於 2022年4月26日 下午4:47 寫道:
>>>>> 在 2022/3/17 下午3:26, ~eopxd 写道:
>>>>>> From: Yueh-Ting (eop) Chen <eop.chen@sifive.com> <mailto:eop.chen@sifive.com>
>>>>>> 
>>>>>> This is the first commit regarding the mask agnostic behavior.
>>>>>> Added option 'rvv_ma_all_1s' to enable the behavior, the option
>>>>>> is default to false.
>>>>>> 
>>>>>> Signed-off-by: eop Chen <eop.chen@sifive.com> <mailto:eop.chen@sifive.com>
>>>>>> Reviewed-by: Frank Chang <frank.chang@sifive.com> <mailto:frank.chang@sifive.com>
>>>>> Similar to our last discussion,  vext_set_elems_1s_fns array can be simplified to single vext_set_elems_1s,
>>>>> 
>>>>> since the fourth argement can be used as the start offset. 
>>>>> 
>>>>> Another question, may be not related to this patchset, in section 3.4.3 of the spec: 
>>>>> 
>>>>> "Mask destination tail elements are always treated as tail-agnostic, regardless of the setting of vta."
>>>>> 
>>>>> What does "Mask destination tail elements" mean?
>>>>> 
>>>>> Regards,
>>>>> 
>>>>> Weiwei Li
>>>>> 
>>>> 
>>>> 
>>>> I have just updated a new version for the tail agnostic patch set, it also includes a bug fix discovered today.
>>>> 
>>>> Regarding the question, mask / masked-off are for body elements and respects the mask policy, and tail elements respect the tail policy.
>>>> 
>>>> Regards,
>>>> 
>>>> eop Chen
>>> 
>>> I find another descriptions in the spec. For the instructions that write mask register (such as vmadc, vmseq,vmsne,vmfeq...), they all have similar description
>>> 
>>> (You can search "tail-agnostic" in the spec):
>>> 
>>> Section 11.4: "Because these instructions produce a mask value, they always operate with a tail-agnostic policy"
>>> 
>>> Section 11.8/13.13: "Compares write mask registers, and so always operate under a tail-agnostic policy"
>>> 
>>> Section 15.1: "Mask elements past vl, the tail elements, are always updated with a tail-agnostic policy"
>>> 
>>> Section 15.4/15.5/15.6: "The tail elements in the destination mask register are updated under a tail-agnostic policy"
>>> 
>>> So I think "Mask destination tail elements" may means the tail element for instructions that take mask register as destination register.  For these instructions, 
>>> 
>>> maybe the setting of VTA can be ignored.  
>>> 
>>> Regards,
>>> 
>>> Weiwei Li
>>> 
>> 
>> Yes, the setting of VTA should be ignored when v-spec specifies.
>> I think the implementation of the tail agnostic patch set don’t need to change on this.
> Sorry. I don't get your idea? 
> 
> In current patch, these instructions seems need to set the tail elements to 1s when vta is true which means
> 
> VTA is setted and rvv_ta_all_1s is enabled. If setting of VTA should be ignored for these instrucitons,  
> 
> they will set the tail elements to 1s only when rvv_ta_all_1s is enabled.
> 
> Regards,
> 
> Weiwei Li
> 
>> 
>> Regards,
>> 
>> eop Chen
>> 
>> 
>> 

I see your point now. Yes the implementation should be changed.
Let me submit a new version for this.

Regards,

eop Chen



[-- Attachment #2: Type: text/html, Size: 8143 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2022-04-27  3:45 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-25 14:18 [PATCH qemu 0/9] Add mask agnostic behavior for rvv instructions ~eopxd
2022-04-25 14:18 ` ~eopxd
2022-03-17  7:26 ` [PATCH qemu 1/9] target/riscv: rvv: Add mask agnostic for vv instructions ~eopxd
2022-04-25 14:18   ` ~eopxd
2022-04-26  8:47   ` Weiwei Li
2022-04-26  8:47     ` Weiwei Li
2022-04-26 18:20     ` eop Chen
2022-04-27  1:01       ` Weiwei Li
2022-04-27  2:07         ` eop Chen
2022-04-27  3:27           ` Weiwei Li
2022-04-27  3:43             ` eop Chen
2022-03-17  7:47 ` [PATCH qemu 2/9] target/riscv: rvv: Add mask agnostic for vector load / store instructions ~eopxd
2022-04-25 14:18   ` ~eopxd
2022-03-17  8:38 ` [PATCH qemu 3/9] target/riscv: rvv: Add mask agnostic for vx instructions ~eopxd
2022-04-25 14:18   ` ~eopxd
2022-03-17  8:43 ` [PATCH qemu 4/9] target/riscv: rvv: Add mask agnostic for vector integer shift instructions ~eopxd
2022-04-25 14:18   ` ~eopxd
2022-03-17  8:46 ` [PATCH qemu 5/9] target/riscv: rvv: Add mask agnostic for vector integer comparison instructions ~eopxd
2022-04-25 14:18   ` ~eopxd
2022-03-17  8:52 ` [PATCH qemu 6/9] target/riscv: rvv: Add mask agnostic for vector fix-point arithmetic instructions ~eopxd
2022-04-25 14:18   ` ~eopxd
2022-03-17  9:08 ` [PATCH qemu 7/9] target/riscv: rvv: Add mask agnostic for vector floating-point instructions ~eopxd
2022-04-25 14:18   ` ~eopxd
2022-03-17  9:14 ` [PATCH qemu 8/9] target/riscv: rvv: Add mask agnostic for vector mask instructions ~eopxd
2022-04-25 14:18   ` ~eopxd
2022-03-17  9:32 ` [PATCH qemu 9/9] target/riscv: rvv: Add mask agnostic for vector permutation instructions ~eopxd
2022-04-25 14:18   ` ~eopxd

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.