Re: [PATCH 05/38] target/riscv: 8-bit Addition & Subtraction Instruction

From: Palmer Dabbelt <palmer@dabbelt.com>
To: alistair23@gmail.com
Cc: richard.henderson@linaro.org, qemu-riscv@nongnu.org,
	zhiwei_liu@c-sky.com, qemu-devel@nongnu.org
Subject: Re: [PATCH 05/38] target/riscv: 8-bit Addition & Subtraction Instruction
Date: Sun, 23 May 2021 18:00:35 -0700 (PDT)	[thread overview]
Message-ID: <mhng-167a21fd-72ef-432a-896e-ac21b587c560@palmerdabbelt-glaptop> (raw)
In-Reply-To: <CAKmqyKO9UHGkfRdb8dEVHFaxCjGox3x+-g066nRc_vqc7wtVWQ@mail.gmail.com>

On Mon, 15 Mar 2021 14:22:58 PDT (-0700), alistair23@gmail.com wrote:
> On Fri, Feb 12, 2021 at 10:14 AM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote:
>>
>> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
>
> Acked-by: Alistair Francis <alistair.francis@wdc.com>

I saw some reviews on the other ones, but since others (like this) just 
have acks and haven't had any other traffic I'm going to start here.

It looks like the latest spec is 0.9.4, but the changelog is pretty 
minimal between 0.9.5 and 0.9.2:

[0.9.2 -> 0.9.3]

* Changed Zp64 name to Zpsfoperand.
* Added Zprvsfextra for RV64 only instructions.
* Removed SWAP16 encoding. It is an alias of PKBT16.
* Fixed few typos and enhanced precision descriptions on imtermediate results.

[0.9.3 -> 0.9.4]

* Fixed few typos and enhanced precision descriptions on imtermediate results.
* Fixed/Changed data types for some intrinsic functions.
* Removed "RV32 Only" for Zpsfoperand.

So I'm just going to stick with reviewing based on the latest spec 
<https://github.com/riscv/riscv-p-spec/blob/d33a761f805d3b7c84214e5654a511267985a0a0/P-ext-proposal.pdf> 
and try to keep those differences in mind, assuming we're just tracking 
the latest draft here.

> Alistair
>
>> ---
>>  target/riscv/helper.h                   |  9 +++
>>  target/riscv/insn32.decode              | 11 ++++
>>  target/riscv/insn_trans/trans_rvp.c.inc | 79 +++++++++++++++++++++++++
>>  target/riscv/packed_helper.c            | 73 +++++++++++++++++++++++
>>  4 files changed, 172 insertions(+)
>>
>> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
>> index 6d622c732a..a69a6b4e84 100644
>> --- a/target/riscv/helper.h
>> +++ b/target/riscv/helper.h
>> @@ -1175,3 +1175,12 @@ DEF_HELPER_3(rstsa16, tl, env, tl, tl)
>>  DEF_HELPER_3(urstsa16, tl, env, tl, tl)
>>  DEF_HELPER_3(kstsa16, tl, env, tl, tl)
>>  DEF_HELPER_3(ukstsa16, tl, env, tl, tl)
>> +
>> +DEF_HELPER_3(radd8, tl, env, tl, tl)
>> +DEF_HELPER_3(uradd8, tl, env, tl, tl)
>> +DEF_HELPER_3(kadd8, tl, env, tl, tl)
>> +DEF_HELPER_3(ukadd8, tl, env, tl, tl)
>> +DEF_HELPER_3(rsub8, tl, env, tl, tl)
>> +DEF_HELPER_3(ursub8, tl, env, tl, tl)
>> +DEF_HELPER_3(ksub8, tl, env, tl, tl)
>> +DEF_HELPER_3(uksub8, tl, env, tl, tl)
>> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
>> index 8815e90476..358dd1fa10 100644
>> --- a/target/riscv/insn32.decode
>> +++ b/target/riscv/insn32.decode
>> @@ -624,3 +624,14 @@ rstsa16    1011011  ..... ..... 010 ..... 1111111 @r
>>  urstsa16   1101011  ..... ..... 010 ..... 1111111 @r
>>  kstsa16    1100011  ..... ..... 010 ..... 1111111 @r
>>  ukstsa16   1110011  ..... ..... 010 ..... 1111111 @r
>> +
>> +add8       0100100  ..... ..... 000 ..... 1111111 @r
>> +radd8      0000100  ..... ..... 000 ..... 1111111 @r
>> +uradd8     0010100  ..... ..... 000 ..... 1111111 @r
>> +kadd8      0001100  ..... ..... 000 ..... 1111111 @r
>> +ukadd8     0011100  ..... ..... 000 ..... 1111111 @r
>> +sub8       0100101  ..... ..... 000 ..... 1111111 @r
>> +rsub8      0000101  ..... ..... 000 ..... 1111111 @r
>> +ursub8     0010101  ..... ..... 000 ..... 1111111 @r
>> +ksub8      0001101  ..... ..... 000 ..... 1111111 @r
>> +uksub8     0011101  ..... ..... 000 ..... 1111111 @r
>> diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc
>> index 0885a4fd45..109f560ec9 100644
>> --- a/target/riscv/insn_trans/trans_rvp.c.inc
>> +++ b/target/riscv/insn_trans/trans_rvp.c.inc
>> @@ -159,3 +159,82 @@ GEN_RVP_R_OOL(rstsa16);
>>  GEN_RVP_R_OOL(urstsa16);
>>  GEN_RVP_R_OOL(kstsa16);
>>  GEN_RVP_R_OOL(ukstsa16);
>> +
>> +/* 8-bit Addition & Subtraction Instructions */
>> +/*
>> + *  Copied from tcg-op-gvec.c.
>> + *
>> + *  Perform a vector addition using normal addition and a mask.  The mask
>> + *  should be the sign bit of each lane.  This 6-operation form is more
>> + *  efficient than separate additions when there are 4 or more lanes in
>> + *  the 64-bit operation.
>> + */
>> +
>> +static void gen_simd_add_mask(TCGv d, TCGv a, TCGv b, TCGv m)
>> +{
>> +    TCGv t1 = tcg_temp_new();
>> +    TCGv t2 = tcg_temp_new();
>> +    TCGv t3 = tcg_temp_new();
>> +
>> +    tcg_gen_andc_tl(t1, a, m);
>> +    tcg_gen_andc_tl(t2, b, m);
>> +    tcg_gen_xor_tl(t3, a, b);
>> +    tcg_gen_add_tl(d, t1, t2);
>> +    tcg_gen_and_tl(t3, t3, m);
>> +    tcg_gen_xor_tl(d, d, t3);
>> +
>> +    tcg_temp_free(t1);
>> +    tcg_temp_free(t2);
>> +    tcg_temp_free(t3);
>> +}
>> +
>> +static void tcg_gen_simd_add8(TCGv d, TCGv a, TCGv b)
>> +{
>> +    TCGv m = tcg_const_tl((target_ulong)dup_const(MO_8, 0x80));
>> +    gen_simd_add_mask(d, a, b, m);
>> +    tcg_temp_free(m);
>> +}
>> +
>> +GEN_RVP_R_INLINE(add8, add, 0, trans_add);
>> +
>> +/*
>> + *  Copied from tcg-op-gvec.c.
>> + *
>> + *  Perform a vector subtraction using normal subtraction and a mask.
>> + *  Compare gen_addv_mask above.
>> + */
>> +static void gen_simd_sub_mask(TCGv d, TCGv a, TCGv b, TCGv m)
>> +{
>> +    TCGv t1 = tcg_temp_new();
>> +    TCGv t2 = tcg_temp_new();
>> +    TCGv t3 = tcg_temp_new();
>> +
>> +    tcg_gen_or_tl(t1, a, m);
>> +    tcg_gen_andc_tl(t2, b, m);
>> +    tcg_gen_eqv_tl(t3, a, b);
>> +    tcg_gen_sub_tl(d, t1, t2);
>> +    tcg_gen_and_tl(t3, t3, m);
>> +    tcg_gen_xor_tl(d, d, t3);
>> +
>> +    tcg_temp_free(t1);
>> +    tcg_temp_free(t2);
>> +    tcg_temp_free(t3);
>> +}
>> +
>> +static void tcg_gen_simd_sub8(TCGv d, TCGv a, TCGv b)
>> +{
>> +    TCGv m = tcg_const_tl((target_ulong)dup_const(MO_8, 0x80));
>> +    gen_simd_sub_mask(d, a, b, m);
>> +    tcg_temp_free(m);
>> +}
>> +
>> +GEN_RVP_R_INLINE(sub8, sub, 0, trans_sub);
>> +
>> +GEN_RVP_R_OOL(radd8);
>> +GEN_RVP_R_OOL(uradd8);
>> +GEN_RVP_R_OOL(kadd8);
>> +GEN_RVP_R_OOL(ukadd8);
>> +GEN_RVP_R_OOL(rsub8);
>> +GEN_RVP_R_OOL(ursub8);
>> +GEN_RVP_R_OOL(ksub8);
>> +GEN_RVP_R_OOL(uksub8);
>> diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
>> index b84abaaf25..62db072204 100644
>> --- a/target/riscv/packed_helper.c
>> +++ b/target/riscv/packed_helper.c
>> @@ -352,3 +352,76 @@ static inline void do_ukstsa16(CPURISCVState *env, void *vd, void *va,
>>  }
>>
>>  RVPR(ukstsa16, 2, 2);
>> +
>> +/* 8-bit Addition & Subtraction Instructions */
>> +static inline void do_radd8(CPURISCVState *env, void *vd, void *va,
>> +                            void *vb, uint8_t i)
>> +{
>> +    int8_t *d = vd, *a = va, *b = vb;
>> +    d[i] = hadd32(a[i], b[i]);
>> +}
>> +
>> +RVPR(radd8, 1, 1);
>> +
>> +static inline void do_uradd8(CPURISCVState *env, void *vd, void *va,
>> +                                  void *vb, uint8_t i)
>> +{
>> +    uint8_t *d = vd, *a = va, *b = vb;
>> +    d[i] = haddu32(a[i], b[i]);
>> +}
>> +
>> +RVPR(uradd8, 1, 1);
>> +
>> +static inline void do_kadd8(CPURISCVState *env, void *vd, void *va,
>> +                            void *vb, uint8_t i)
>> +{
>> +    int8_t *d = vd, *a = va, *b = vb;
>> +    d[i] = sadd8(env, 0, a[i], b[i]);
>> +}
>> +
>> +RVPR(kadd8, 1, 1);
>> +
>> +static inline void do_ukadd8(CPURISCVState *env, void *vd, void *va,
>> +                             void *vb, uint8_t i)
>> +{
>> +    uint8_t *d = vd, *a = va, *b = vb;
>> +    d[i] = saddu8(env, 0, a[i], b[i]);
>> +}
>> +
>> +RVPR(ukadd8, 1, 1);
>> +
>> +static inline void do_rsub8(CPURISCVState *env, void *vd, void *va,
>> +                            void *vb, uint8_t i)
>> +{
>> +    int8_t *d = vd, *a = va, *b = vb;
>> +    d[i] = hsub32(a[i], b[i]);
>> +}
>> +
>> +RVPR(rsub8, 1, 1);
>> +
>> +static inline void do_ursub8(CPURISCVState *env, void *vd, void *va,
>> +                             void *vb, uint8_t i)
>> +{
>> +    uint8_t *d = vd, *a = va, *b = vb;
>> +    d[i] = hsubu64(a[i], b[i]);
>> +}
>> +
>> +RVPR(ursub8, 1, 1);
>> +
>> +static inline void do_ksub8(CPURISCVState *env, void *vd, void *va,
>> +                            void *vb, uint8_t i)
>> +{
>> +    int8_t *d = vd, *a = va, *b = vb;
>> +    d[i] = ssub8(env, 0, a[i], b[i]);
>> +}
>> +
>> +RVPR(ksub8, 1, 1);
>> +
>> +static inline void do_uksub8(CPURISCVState *env, void *vd, void *va,
>> +                             void *vb, uint8_t i)
>> +{
>> +    uint8_t *d = vd, *a = va, *b = vb;
>> +    d[i] = ssubu8(env, 0, a[i], b[i]);
>> +}
>> +
>> +RVPR(uksub8, 1, 1);
>> --
>> 2.17.1
>>

The naming on some of these helpers is a bit odd, but given that they're 
a mix of the V and P extensions it's probably fine to just leave them 
as-is.  

Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com>