All of lore.kernel.org
 help / color / mirror / Atom feed
From: Weiwei Li <liweiwei@iscas.ac.cn>
To: ~eopxd <yueh.ting.chen@gmail.com>,
	qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: Frank Chang <frank.chang@sifive.com>,
	Alistair Francis <alistair.francis@wdc.com>,
	Bin Meng <bin.meng@windriver.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	eop Chen <eop.chen@sifive.com>
Subject: Re: [PATCH qemu v5 05/14] target/riscv: rvv: Add tail agnostic for vector load / store instructions
Date: Mon, 28 Mar 2022 19:56:30 +0800	[thread overview]
Message-ID: <7f3c995d-182f-f1b3-4e79-94f5b81e1be9@iscas.ac.cn> (raw)
In-Reply-To: <164845204233.25323.14607469451359734000-5@git.sr.ht>


在 2022/3/7 下午3:10, ~eopxd 写道:
> From: eopXD <eop.chen@sifive.com>
>
> Signed-off-by: eop Chen <eop.chen@sifive.com>
> Reviewed-by: Frank Chang <frank.chang@sifive.com>
> ---
>   target/riscv/insn_trans/trans_rvv.c.inc |  9 +++++++
>   target/riscv/vector_helper.c            | 32 +++++++++++++++++++++++++
>   2 files changed, 41 insertions(+)
>
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index cc80bf00ff..66cfc8c603 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -711,6 +711,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
>   }
>   
> @@ -748,6 +749,7 @@ static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
>   }
>   
> @@ -774,6 +776,7 @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew)
>       /* EMUL = 1, NFIELDS = 1 */
>       data = FIELD_DP32(data, VDATA, LMUL, 0);
>       data = FIELD_DP32(data, VDATA, NF, 1);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
>   }
>   
> @@ -791,6 +794,7 @@ static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a, uint8_t eew)
>       /* EMUL = 1, NFIELDS = 1 */
>       data = FIELD_DP32(data, VDATA, LMUL, 0);
>       data = FIELD_DP32(data, VDATA, NF, 1);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
>   }
>   
> @@ -862,6 +866,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
>   }
>   
> @@ -891,6 +896,7 @@ static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       fn = fns[eew];
>       if (fn == NULL) {
>           return false;
> @@ -991,6 +997,7 @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
>   }
>   
> @@ -1043,6 +1050,7 @@ static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, true);
>   }
>   
> @@ -1108,6 +1116,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldff_trans(a->rd, a->rs1, data, fn, s);
>   }
>   
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 39c79c59c2..1c7015e917 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -289,6 +289,9 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
>       uint32_t i, k;
>       uint32_t nf = vext_nf(desc);
>       uint32_t max_elems = vext_max_elems(desc, log2_esz);
> +    uint32_t esz = 1 << log2_esz;
> +    uint32_t total_elems = vext_get_total_elems(desc, esz);
> +    uint32_t vta = vext_vta(desc);
>   
>       for (i = env->vstart; i < env->vl; i++, env->vstart++) {
>           if (!vm && !vext_elem_mask(v0, i)) {
> @@ -303,6 +306,11 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
>           }
>       }
>       env->vstart = 0;
> +    /* set tail elements to 1s */
> +    for (k = 0; k < nf; ++k) {
> +        vext_set_elems_1s_fns[log2_esz](vd, vta, env->vl + k * total_elems,
> +                                        env->vl * esz, total_elems * esz);
> +    }
>   }
>   
>   #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
> @@ -348,6 +356,9 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
>       uint32_t i, k;
>       uint32_t nf = vext_nf(desc);
>       uint32_t max_elems = vext_max_elems(desc, log2_esz);
> +    uint32_t esz = 1 << log2_esz;
> +    uint32_t total_elems = vext_get_total_elems(desc, esz);
> +    uint32_t vta = vext_vta(desc);
>   
>       /* load bytes from guest memory */
>       for (i = env->vstart; i < evl; i++, env->vstart++) {
> @@ -359,6 +370,11 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
>           }
>       }
>       env->vstart = 0;
> +    /* set tail elements to 1s */
> +    for (k = 0; k < nf; ++k) {
> +        vext_set_elems_1s_fns[log2_esz](vd, vta, env->vl + k * total_elems,
> +                                        env->vl * esz, total_elems * esz);
> +    }
>   }
>   

It seems incorrect here. similar to following load/store helper.

In above instructions,  following elements are loaded:

0       *  max_elems          ...     0      *max_elems + vl - 1

1       *  max_elems          ...     1     *max_elems + vl - 1

.......

(nf-1)* max_elems         ...    (nf-1)*max_elems + vl - 1

So,  the elements[vl  .. max_elems  - 1]  are  tail elements, however 
elements[vl ... 1* total_elems - 1] may not:

elements from max_elems to total_elems - 1 are active elements, If 
total_elems > max_elems(LMUL< 1)

Or LMUL should be equal or greater than 1 here? I didn't find any 
description about this from the spec.

I also have another question about the tail elements for these 
load/store instructions:

when nf = 3, LMUL = 1, vl=vlmax,  reg, reg+1, reg+2 will be loaded, then 
whether elements in reg+3

(if they belong to the same register group) are tail elements?

Regards,

Weiwei Li



WARNING: multiple messages have this Message-ID (diff)
From: Weiwei Li <liweiwei@iscas.ac.cn>
To: ~eopxd <yueh.ting.chen@gmail.com>,
	qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: Palmer Dabbelt <palmer@dabbelt.com>,
	Alistair Francis <alistair.francis@wdc.com>,
	Bin Meng <bin.meng@windriver.com>,
	Frank Chang <frank.chang@sifive.com>,
	eop Chen <eop.chen@sifive.com>
Subject: Re: [PATCH qemu v5 05/14] target/riscv: rvv: Add tail agnostic for vector load / store instructions
Date: Mon, 28 Mar 2022 19:56:30 +0800	[thread overview]
Message-ID: <7f3c995d-182f-f1b3-4e79-94f5b81e1be9@iscas.ac.cn> (raw)
In-Reply-To: <164845204233.25323.14607469451359734000-5@git.sr.ht>


在 2022/3/7 下午3:10, ~eopxd 写道:
> From: eopXD <eop.chen@sifive.com>
>
> Signed-off-by: eop Chen <eop.chen@sifive.com>
> Reviewed-by: Frank Chang <frank.chang@sifive.com>
> ---
>   target/riscv/insn_trans/trans_rvv.c.inc |  9 +++++++
>   target/riscv/vector_helper.c            | 32 +++++++++++++++++++++++++
>   2 files changed, 41 insertions(+)
>
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index cc80bf00ff..66cfc8c603 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -711,6 +711,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
>   }
>   
> @@ -748,6 +749,7 @@ static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
>   }
>   
> @@ -774,6 +776,7 @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew)
>       /* EMUL = 1, NFIELDS = 1 */
>       data = FIELD_DP32(data, VDATA, LMUL, 0);
>       data = FIELD_DP32(data, VDATA, NF, 1);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
>   }
>   
> @@ -791,6 +794,7 @@ static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a, uint8_t eew)
>       /* EMUL = 1, NFIELDS = 1 */
>       data = FIELD_DP32(data, VDATA, LMUL, 0);
>       data = FIELD_DP32(data, VDATA, NF, 1);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
>   }
>   
> @@ -862,6 +866,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
>   }
>   
> @@ -891,6 +896,7 @@ static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       fn = fns[eew];
>       if (fn == NULL) {
>           return false;
> @@ -991,6 +997,7 @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
>   }
>   
> @@ -1043,6 +1050,7 @@ static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, true);
>   }
>   
> @@ -1108,6 +1116,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, emul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
>       return ldff_trans(a->rd, a->rs1, data, fn, s);
>   }
>   
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 39c79c59c2..1c7015e917 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -289,6 +289,9 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
>       uint32_t i, k;
>       uint32_t nf = vext_nf(desc);
>       uint32_t max_elems = vext_max_elems(desc, log2_esz);
> +    uint32_t esz = 1 << log2_esz;
> +    uint32_t total_elems = vext_get_total_elems(desc, esz);
> +    uint32_t vta = vext_vta(desc);
>   
>       for (i = env->vstart; i < env->vl; i++, env->vstart++) {
>           if (!vm && !vext_elem_mask(v0, i)) {
> @@ -303,6 +306,11 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
>           }
>       }
>       env->vstart = 0;
> +    /* set tail elements to 1s */
> +    for (k = 0; k < nf; ++k) {
> +        vext_set_elems_1s_fns[log2_esz](vd, vta, env->vl + k * total_elems,
> +                                        env->vl * esz, total_elems * esz);
> +    }
>   }
>   
>   #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
> @@ -348,6 +356,9 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
>       uint32_t i, k;
>       uint32_t nf = vext_nf(desc);
>       uint32_t max_elems = vext_max_elems(desc, log2_esz);
> +    uint32_t esz = 1 << log2_esz;
> +    uint32_t total_elems = vext_get_total_elems(desc, esz);
> +    uint32_t vta = vext_vta(desc);
>   
>       /* load bytes from guest memory */
>       for (i = env->vstart; i < evl; i++, env->vstart++) {
> @@ -359,6 +370,11 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
>           }
>       }
>       env->vstart = 0;
> +    /* set tail elements to 1s */
> +    for (k = 0; k < nf; ++k) {
> +        vext_set_elems_1s_fns[log2_esz](vd, vta, env->vl + k * total_elems,
> +                                        env->vl * esz, total_elems * esz);
> +    }
>   }
>   

It seems incorrect here. similar to following load/store helper.

In above instructions,  following elements are loaded:

0       *  max_elems          ...     0      *max_elems + vl - 1

1       *  max_elems          ...     1     *max_elems + vl - 1

.......

(nf-1)* max_elems         ...    (nf-1)*max_elems + vl - 1

So,  the elements[vl  .. max_elems  - 1]  are  tail elements, however 
elements[vl ... 1* total_elems - 1] may not:

elements from max_elems to total_elems - 1 are active elements, If 
total_elems > max_elems(LMUL< 1)

Or LMUL should be equal or greater than 1 here? I didn't find any 
description about this from the spec.

I also have another question about the tail elements for these 
load/store instructions:

when nf = 3, LMUL = 1, vl=vlmax,  reg, reg+1, reg+2 will be loaded, then 
whether elements in reg+3

(if they belong to the same register group) are tail elements?

Regards,

Weiwei Li



  reply	other threads:[~2022-03-28 11:59 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-28  7:20 [PATCH qemu v5 00/14] Add tail agnostic behavior for rvv instructions ~eopxd
2022-03-28  7:20 ` ~eopxd
2022-03-01  9:07 ` [PATCH qemu v5 04/14] target/riscv: rvv: Add tail agnostic for vv instructions ~eopxd
2022-03-28  7:20   ` ~eopxd
2022-03-28 11:23   ` Weiwei Li
2022-03-28 11:23     ` Weiwei Li
2022-03-07  7:10 ` [PATCH qemu v5 05/14] target/riscv: rvv: Add tail agnostic for vector load / store instructions ~eopxd
2022-03-28  7:20   ` ~eopxd
2022-03-28 11:56   ` Weiwei Li [this message]
2022-03-28 11:56     ` Weiwei Li
2022-03-30  7:42     ` 陳約廷
2022-03-30  8:27       ` Weiwei Li
2022-03-30 10:02         ` eop Chen
2022-03-30 15:13           ` Weiwei Li
2022-03-07  7:32 ` [PATCH qemu v5 06/14] target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions ~eopxd
2022-03-28  7:20   ` ~eopxd
2022-03-07  9:38 ` [PATCH qemu v5 07/14] target/riscv: rvv: Add tail agnostic for vector integer shift instructions ~eopxd
2022-03-28  7:20   ` ~eopxd
2022-03-07  9:43 ` [PATCH qemu v5 08/14] target/riscv: rvv: Add tail agnostic for vector integer comparison instructions ~eopxd
2022-03-28  7:20   ` ~eopxd
2022-03-07  9:53 ` [PATCH qemu v5 09/14] target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions ~eopxd
2022-03-28  7:21   ` ~eopxd
2022-03-07 10:04 ` [PATCH qemu v5 10/14] target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic instructions ~eopxd
2022-03-28  7:20   ` ~eopxd
2022-03-07 10:05 ` [PATCH qemu v5 11/14] target/riscv: rvv: Add tail agnostic for vector floating-point instructions ~eopxd
2022-03-28  7:21   ` ~eopxd
2022-03-07 12:21 ` [PATCH qemu v5 12/14] target/riscv: rvv: Add tail agnostic for vector reduction instructions ~eopxd
2022-03-28  7:21   ` ~eopxd
2022-03-07 15:26 ` [PATCH qemu v5 13/14] target/riscv: rvv: Add tail agnostic for vector mask instructions ~eopxd
2022-03-28  7:21   ` ~eopxd
2022-03-07 15:59 ` [PATCH qemu v5 14/14] target/riscv: rvv: Add tail agnostic for vector permutation instructions ~eopxd
2022-03-28  7:21   ` ~eopxd
2022-03-09  8:34 ` [PATCH qemu v5 02/14] target/riscv: rvv: Rename ambiguous esz ~eopxd
2022-03-28  7:20   ` ~eopxd
2022-03-12  6:28 ` [PATCH qemu v5 03/14] target/riscv: rvv: Early exit when vstart >= vl ~eopxd
2022-03-28  7:20   ` ~eopxd
2022-03-14  7:38 ` [PATCH qemu v5 01/14] target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed ~eopxd
2022-03-14  7:38   ` ~eopxd

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7f3c995d-182f-f1b3-4e79-94f5b81e1be9@iscas.ac.cn \
    --to=liweiwei@iscas.ac.cn \
    --cc=alistair.francis@wdc.com \
    --cc=bin.meng@windriver.com \
    --cc=eop.chen@sifive.com \
    --cc=frank.chang@sifive.com \
    --cc=palmer@dabbelt.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-riscv@nongnu.org \
    --cc=yueh.ting.chen@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.