Re: [PATCH 09/13] target/riscv: Adjust vector address with ol

From: LIU Zhiwei <zhiwei_liu@c-sky.com>
To: Richard Henderson <richard.henderson@linaro.org>,
	qemu-devel@nongnu.org, qemu-riscv@nongnu.org,
	Alexey Baturo <baturo.alexey@gmail.com>
Cc: palmer@dabbelt.com, bin.meng@windriver.com, Alistair.Francis@wdc.com
Subject: Re: [PATCH 09/13] target/riscv: Adjust vector address with ol
Date: Tue, 9 Nov 2021 16:39:40 +0800	[thread overview]
Message-ID: <08078788-dd46-7e7a-14eb-7eb95c2c9c89@c-sky.com> (raw)
In-Reply-To: <1bd3a3ff-8ca8-ed78-1bc7-c4668965f448@linaro.org>

[-- Attachment #1: Type: text/plain, Size: 9753 bytes --]


On 2021/11/9 下午4:18, Richard Henderson wrote:
> On 11/9/21 9:04 AM, LIU Zhiwei wrote:
>> On 2021/11/9 下午2:37, Richard Henderson wrote:
>>
>>> On 11/8/21 10:28 AM, LIU Zhiwei wrote:
>>>> On 2021/11/1 下午7:35, Richard Henderson wrote:
>>>>
>>>>> On 11/1/21 6:01 AM, LIU Zhiwei wrote:
>>>>>> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
>>>>>> ---
>>>>>>   target/riscv/insn_trans/trans_rvv.c.inc |  8 ++++
>>>>>>   target/riscv/internals.h                |  1 +
>>>>>>   target/riscv/vector_helper.c            | 54 
>>>>>> +++++++++++++++++--------
>>>>>>   3 files changed, 46 insertions(+), 17 deletions(-)
>>>>>>
>>>>>> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
>>>>>> b/target/riscv/insn_trans/trans_rvv.c.inc
>>>>>> index ed042f7bb9..5cd9b802df 100644
>>>>>> --- a/target/riscv/insn_trans/trans_rvv.c.inc
>>>>>> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
>>>>>> @@ -233,6 +233,7 @@ static bool ld_us_op(DisasContext *s, 
>>>>>> arg_r2nfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>>>>>   }
>>>>>>   @@ -286,6 +287,7 @@ static bool st_us_op(DisasContext *s, 
>>>>>> arg_r2nfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>>>>>   }
>>>>>>   @@ -365,6 +367,7 @@ static bool ld_stride_op(DisasContext *s, 
>>>>>> arg_rnfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>>   }
>>>>>>   @@ -404,6 +407,7 @@ static bool st_stride_op(DisasContext *s, 
>>>>>> arg_rnfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       fn =  fns[seq][s->sew];
>>>>>>       if (fn == NULL) {
>>>>>>           return false;
>>>>>> @@ -490,6 +494,7 @@ static bool ld_index_op(DisasContext *s, 
>>>>>> arg_rnfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>>   }
>>>>>>   @@ -542,6 +547,7 @@ static bool st_index_op(DisasContext *s, 
>>>>>> arg_rnfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>>   }
>>>>>>   @@ -617,6 +623,7 @@ static bool ldff_op(DisasContext *s, 
>>>>>> arg_r2nfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return ldff_trans(a->rd, a->rs1, data, fn, s);
>>>>>>   }
>>>>>>   @@ -724,6 +731,7 @@ static bool amo_op(DisasContext *s, 
>>>>>> arg_rwdvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, WD, a->wd);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>>   }
>>>>>>   /*
>>>>>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>>>>>> index b15ad394bb..f74b8291e4 100644
>>>>>> --- a/target/riscv/internals.h
>>>>>> +++ b/target/riscv/internals.h
>>>>>> @@ -27,6 +27,7 @@ FIELD(VDATA, VM, 8, 1)
>>>>>>   FIELD(VDATA, LMUL, 9, 2)
>>>>>>   FIELD(VDATA, NF, 11, 4)
>>>>>>   FIELD(VDATA, WD, 11, 1)
>>>>>> +FIELD(VDATA, OL, 15, 2)
>>>>>>     /* float point classify helpers */
>>>>>>   target_ulong fclass_h(uint64_t frs1);
>>>>>> diff --git a/target/riscv/vector_helper.c 
>>>>>> b/target/riscv/vector_helper.c
>>>>>> index 535420ee66..451688c328 100644
>>>>>> --- a/target/riscv/vector_helper.c
>>>>>> +++ b/target/riscv/vector_helper.c
>>>>>> @@ -112,6 +112,11 @@ static uint32_t vext_wd(uint32_t desc)
>>>>>>       return (simd_data(desc) >> 11) & 0x1;
>>>>>>   }
>>>>>>   +static inline uint32_t vext_ol(uint32_t desc)
>>>>>> +{
>>>>>> +    return FIELD_EX32(simd_data(desc), VDATA, OL);
>>>>>> +}
>>>>>
>>>>> XLEN not OLEN.
>>>> OK.
>>>>>
>>>>>> @@ -123,6 +128,14 @@ static inline uint32_t vext_maxsz(uint32_t 
>>>>>> desc)
>>>>>>       return simd_maxsz(desc) << vext_lmul(desc);
>>>>>>   }
>>>>>>   +static inline target_ulong adjust_addr(target_ulong addr, 
>>>>>> uint32_t olen)
>>>>>> +{
>>>>>> +    if (olen < TARGET_LONG_BITS) {
>>>>>> +        addr &= UINT32_MAX;
>>>>>> +    }
>>>>>> +    return addr;
>>>>>> +}
>>>>>
>>>>> Here's where I'm unsure.  This looks a lot like the changes that 
>>>>> are required to support pointer-masking in vectors, which Alexey 
>>>>> said he was going to look at.
>>>>>
>>>>> (1) Do we need to pass anything in VEXT at all?
>>>>>     We do have CPURISCVState, so we could just use cpu_get_ml,
>>>> Yes, we should use cpu_get_xl.
>>>>> which we would also need for env->mmte etc for pointer masking.
>>>>
>>>> Do you mean env->mpmmask and env->mpmbase? I think yes, we should 
>>>> also adjust these register behaviors with xlen.
>>>
>>> I mean the set of [msu]pmmask and [msu]pmbase, selected as 
>>> appropriate for the current execution mode.
>>>
>>>>> (3) Do we try to streamline the computation by passing down composite
>>>>>     mask and base parameters.  This way we don't need to do complex
>>>>>     examination of ENV to determine execution mode, and instead 
>>>>> always
>>>>>     compute
>>>>>
>>>>>        addr = (addr & mask) | base;
>>>>>
>>>>>     where mask = -1, base = 0 for "normal" addressing, and when
>>>>>     UXLEN == 32, mask <= UINT32_MAX.
>>>>
>>>> Do you mean add env->pmmask and env->pmbase?
>>>>
>>>> I can initialize them in riscv_tr_init_disas_context, such as by 
>>>> env->xpmmask & UINT32_MAX .
>>>>
>>>>>
>>>>> (4) Do we in fact want to pre-compute these into known slots on ENV,
>>>>>     so that we don't have to pass these around as separate 
>>>>> parameters?
>>>>>     We would adjust these values during PM CSR changes and when
>>>>>     changing privilege levels.
>>> For option (3), I was suggesting a mask + base pair passed down from 
>>> TCG-generated code.
>>>
>>> For option (4), I was suggesting embedding a mask + base pair in 
>>> env, which would be re-computed at every privilege level change, 
>>> plus reset and vmload.
>>>
>>> In both cases, the mask would be a combination of [msu]pmmask & 
>>> (RV32 ? UINT32_MAX : UINT64_MAX), as you say.
>>
>> We will calculate [msu]pmmask by  csrrw , and we have ignored high 
>> bits there.
>>
>> Can we just use the [msu]pmmmask?
>
> We could.  However:
>
> In order to select [msu]pmmask, we have to look up the current cpu 
> state.  In order to mask the high bits, we have to look up the current 
> xl, which requires that we look up the current cpu state then extract 
> the xl from misa  and mstatus.
>
> All of which means that we're doing repeated lookups for every memory 
> access.  I am suggesting that we either (3) compile those lookups into 
> the generated code or (4) cache those lookups when state changes (csr 
> writes and priv changes).


Do you mean we should add this code to riscv_tr_init_disas_context

     if (ctx->pm_enabled) {
          switch (priv) {
          case PRV_M:
              env->mask = env->mpmmask;
              env->base = env->mpmbase;
              break;
          case PRV_S:
              env->mask = env->spmmask;
              env->base = env->spmbase;
              break;
          case PRV_U:
              env->mask = env->upmmask;
              env->base = env->upmbase;
              break;
          default:
              g_assert_not_reached();
          }
          ctx->pm_mask = pm_mask[priv];
          ctx->pm_base = pm_base[priv];
          ctx->need_mask = true; /* new flag for mask */

      } else if (get_xlen(ctx)  < TARGET_LONG_BITS) {
          env->mask = UINT32_MAX;
          env->base = 0;
          ctx->pm_mask = tcg_constant_tl(UINT32_MAX);
          ctx->pm_base = tcg_constant_tl(0);

         ctx->need_mask = true;

      } else {
	 env->mask = UINT64_MAX;
          env->base = 0;
      }

Thanks,
Zhiwei

>
>
> r~

[-- Attachment #2: Type: text/html, Size: 15758 bytes --]