All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alistair Francis <alistair23@gmail.com>
To: LIU Zhiwei <zhiwei_liu@c-sky.com>
Cc: Richard Henderson <richard.henderson@linaro.org>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	"open list:RISC-V" <qemu-riscv@nongnu.org>,
	"qemu-devel@nongnu.org Developers" <qemu-devel@nongnu.org>
Subject: Re: [PATCH 17/38] target/riscv: Signed MSW 32x16 Multiply and Add Instructions
Date: Tue, 16 Mar 2021 12:01:33 -0400	[thread overview]
Message-ID: <CAKmqyKN54Tu2TKLf9Tq0anusS_73JMPS6PwA4yMYwZD+XKf1Mg@mail.gmail.com> (raw)
In-Reply-To: <20210212150256.885-18-zhiwei_liu@c-sky.com>

On Fri, Feb 12, 2021 at 10:38 AM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote:
>
> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>

Acked-by: Alistair Francis <alistair.francis@wdc.com>

Alistair

> ---
>  target/riscv/helper.h                   |  17 ++
>  target/riscv/insn32.decode              |  17 ++
>  target/riscv/insn_trans/trans_rvp.c.inc |  18 ++
>  target/riscv/packed_helper.c            | 208 ++++++++++++++++++++++++
>  4 files changed, 260 insertions(+)
>
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index 0bd21c8514..25aa07a7ff 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -1277,3 +1277,20 @@ DEF_HELPER_4(kmmsb, tl, env, tl, tl, tl)
>  DEF_HELPER_4(kmmsb_u, tl, env, tl, tl, tl)
>  DEF_HELPER_3(kwmmul, tl, env, tl, tl)
>  DEF_HELPER_3(kwmmul_u, tl, env, tl, tl)
> +
> +DEF_HELPER_3(smmwb, tl, env, tl, tl)
> +DEF_HELPER_3(smmwb_u, tl, env, tl, tl)
> +DEF_HELPER_3(smmwt, tl, env, tl, tl)
> +DEF_HELPER_3(smmwt_u, tl, env, tl, tl)
> +DEF_HELPER_4(kmmawb, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawb_u, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawt, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawt_u, tl, env, tl, tl, tl)
> +DEF_HELPER_3(kmmwb2, tl, env, tl, tl)
> +DEF_HELPER_3(kmmwb2_u, tl, env, tl, tl)
> +DEF_HELPER_3(kmmwt2, tl, env, tl, tl)
> +DEF_HELPER_3(kmmwt2_u, tl, env, tl, tl)
> +DEF_HELPER_4(kmmawb2, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawb2_u, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawt2, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawt2_u, tl, env, tl, tl, tl)
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index e0be2790dc..6e63bab2d9 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -745,3 +745,20 @@ kmmsb      0100001  ..... ..... 001 ..... 1111111 @r
>  kmmsb_u    0101001  ..... ..... 001 ..... 1111111 @r
>  kwmmul     0110001  ..... ..... 001 ..... 1111111 @r
>  kwmmul_u   0111001  ..... ..... 001 ..... 1111111 @r
> +
> +smmwb      0100010  ..... ..... 001 ..... 1111111 @r
> +smmwb_u    0101010  ..... ..... 001 ..... 1111111 @r
> +smmwt      0110010  ..... ..... 001 ..... 1111111 @r
> +smmwt_u    0111010  ..... ..... 001 ..... 1111111 @r
> +kmmawb     0100011  ..... ..... 001 ..... 1111111 @r
> +kmmawb_u   0101011  ..... ..... 001 ..... 1111111 @r
> +kmmawt     0110011  ..... ..... 001 ..... 1111111 @r
> +kmmawt_u   0111011  ..... ..... 001 ..... 1111111 @r
> +kmmwb2     1000111  ..... ..... 001 ..... 1111111 @r
> +kmmwb2_u   1001111  ..... ..... 001 ..... 1111111 @r
> +kmmwt2     1010111  ..... ..... 001 ..... 1111111 @r
> +kmmwt2_u   1011111  ..... ..... 001 ..... 1111111 @r
> +kmmawb2    1100111  ..... ..... 001 ..... 1111111 @r
> +kmmawb2_u  1101111  ..... ..... 001 ..... 1111111 @r
> +kmmawt2    1110111  ..... ..... 001 ..... 1111111 @r
> +kmmawt2_u  1111111  ..... ..... 001 ..... 1111111 @r
> diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc
> index fbc9c0b57b..e708ae7a6a 100644
> --- a/target/riscv/insn_trans/trans_rvp.c.inc
> +++ b/target/riscv/insn_trans/trans_rvp.c.inc
> @@ -564,3 +564,21 @@ GEN_RVP_R_ACC_OOL(kmmsb);
>  GEN_RVP_R_ACC_OOL(kmmsb_u);
>  GEN_RVP_R_OOL(kwmmul);
>  GEN_RVP_R_OOL(kwmmul_u);
> +
> +/* Most Significant Word "32x16" Multiply & Add Instructions */
> +GEN_RVP_R_OOL(smmwb);
> +GEN_RVP_R_OOL(smmwb_u);
> +GEN_RVP_R_OOL(smmwt);
> +GEN_RVP_R_OOL(smmwt_u);
> +GEN_RVP_R_ACC_OOL(kmmawb);
> +GEN_RVP_R_ACC_OOL(kmmawb_u);
> +GEN_RVP_R_ACC_OOL(kmmawt);
> +GEN_RVP_R_ACC_OOL(kmmawt_u);
> +GEN_RVP_R_OOL(kmmwb2);
> +GEN_RVP_R_OOL(kmmwb2_u);
> +GEN_RVP_R_OOL(kmmwt2);
> +GEN_RVP_R_OOL(kmmwt2_u);
> +GEN_RVP_R_ACC_OOL(kmmawb2);
> +GEN_RVP_R_ACC_OOL(kmmawb2_u);
> +GEN_RVP_R_ACC_OOL(kmmawt2);
> +GEN_RVP_R_ACC_OOL(kmmawt2_u);
> diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
> index c1322d2fac..ea3c9f6dd8 100644
> --- a/target/riscv/packed_helper.c
> +++ b/target/riscv/packed_helper.c
> @@ -1477,3 +1477,211 @@ static inline void do_kwmmul_u(CPURISCVState *env, void *vd, void *va,
>  }
>
>  RVPR(kwmmul_u, 1, 4);
> +
> +/* Most Significant Word "32x16" Multiply & Add Instructions */
> +static inline void do_smmwb(CPURISCVState *env, void *vd, void *va,
> +                            void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16;
> +}
> +
> +RVPR(smmwb, 1, 4);
> +
> +static inline void do_smmwb_u(CPURISCVState *env, void *vd, void *va,
> +                              void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 15)) >> 16;
> +}
> +
> +RVPR(smmwb_u, 1, 4);
> +
> +static inline void do_smmwt(CPURISCVState *env, void *vd, void *va,
> +                            void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16;
> +}
> +
> +RVPR(smmwt, 1, 4);
> +
> +static inline void do_smmwt_u(CPURISCVState *env, void *vd, void *va,
> +                              void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 15)) >> 16;
> +}
> +
> +RVPR(smmwt_u, 1, 4);
> +
> +static inline void do_kmmawb(CPURISCVState *env, void *vd, void *va,
> +                             void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc;
> +    int16_t *b = vb;
> +    d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawb, 1, 4);
> +
> +static inline void do_kmmawb_u(CPURISCVState *env, void *vd, void *va,
> +                               void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc;
> +    int16_t *b = vb;
> +    d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i)] +
> +                               (1ull << 15)) >> 16, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawb_u, 1, 4);
> +
> +static inline void do_kmmawt(CPURISCVState *env, void *vd, void *va,
> +                             void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc;
> +    int16_t *b = vb;
> +    d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16,
> +                      c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawt, 1, 4);
> +
> +static inline void do_kmmawt_u(CPURISCVState *env, void *vd, void *va,
> +                               void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc;
> +    int16_t *b = vb;
> +    d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] +
> +                               (1ull << 15)) >> 16, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawt_u, 1, 4);
> +
> +static inline void do_kmmwb2(CPURISCVState *env, void *vd, void *va,
> +                             void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        d[H4(i)] = INT32_MAX;
> +    } else {
> +        d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15;
> +    }
> +}
> +
> +RVPR(kmmwb2, 1, 4);
> +
> +static inline void do_kmmwb2_u(CPURISCVState *env, void *vd, void *va,
> +                               void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        d[H4(i)] = INT32_MAX;
> +    } else {
> +        d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15;
> +    }
> +}
> +
> +RVPR(kmmwb2_u, 1, 4);
> +
> +static inline void do_kmmwt2(CPURISCVState *env, void *vd, void *va,
> +                             void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        d[H4(i)] = INT32_MAX;
> +    } else {
> +        d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15;
> +    }
> +}
> +
> +RVPR(kmmwt2, 1, 4);
> +
> +static inline void do_kmmwt2_u(CPURISCVState *env, void *vd, void *va,
> +                               void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        d[H4(i)] = INT32_MAX;
> +    } else {
> +        d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15;
> +    }
> +}
> +
> +RVPR(kmmwt2_u, 1, 4);
> +
> +static inline void do_kmmawb2(CPURISCVState *env, void *vd, void *va,
> +                              void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc, result;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        result = INT32_MAX;
> +    } else {
> +        result = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15;
> +    }
> +    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawb2, 1, 4);
> +
> +static inline void do_kmmawb2_u(CPURISCVState *env, void *vd, void *va,
> +                                void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc, result;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        result = INT32_MAX;
> +    } else {
> +        result = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15;
> +    }
> +    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawb2_u, 1, 4);
> +
> +static inline void do_kmmawt2(CPURISCVState *env, void *vd, void *va,
> +                              void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc, result;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        result = INT32_MAX;
> +    } else {
> +        result = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15;
> +    }
> +    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawt2, 1, 4);
> +
> +static inline void do_kmmawt2_u(CPURISCVState *env, void *vd, void *va,
> +                                void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc, result;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        result = INT32_MAX;
> +    } else {
> +        result = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15;
> +    }
> +    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawt2_u, 1, 4);
> --
> 2.17.1
>


WARNING: multiple messages have this Message-ID (diff)
From: Alistair Francis <alistair23@gmail.com>
To: LIU Zhiwei <zhiwei_liu@c-sky.com>
Cc: "qemu-devel@nongnu.org Developers" <qemu-devel@nongnu.org>,
	"open list:RISC-V" <qemu-riscv@nongnu.org>,
	 Richard Henderson <richard.henderson@linaro.org>,
	Palmer Dabbelt <palmer@dabbelt.com>
Subject: Re: [PATCH 17/38] target/riscv: Signed MSW 32x16 Multiply and Add Instructions
Date: Tue, 16 Mar 2021 12:01:33 -0400	[thread overview]
Message-ID: <CAKmqyKN54Tu2TKLf9Tq0anusS_73JMPS6PwA4yMYwZD+XKf1Mg@mail.gmail.com> (raw)
In-Reply-To: <20210212150256.885-18-zhiwei_liu@c-sky.com>

On Fri, Feb 12, 2021 at 10:38 AM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote:
>
> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>

Acked-by: Alistair Francis <alistair.francis@wdc.com>

Alistair

> ---
>  target/riscv/helper.h                   |  17 ++
>  target/riscv/insn32.decode              |  17 ++
>  target/riscv/insn_trans/trans_rvp.c.inc |  18 ++
>  target/riscv/packed_helper.c            | 208 ++++++++++++++++++++++++
>  4 files changed, 260 insertions(+)
>
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index 0bd21c8514..25aa07a7ff 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -1277,3 +1277,20 @@ DEF_HELPER_4(kmmsb, tl, env, tl, tl, tl)
>  DEF_HELPER_4(kmmsb_u, tl, env, tl, tl, tl)
>  DEF_HELPER_3(kwmmul, tl, env, tl, tl)
>  DEF_HELPER_3(kwmmul_u, tl, env, tl, tl)
> +
> +DEF_HELPER_3(smmwb, tl, env, tl, tl)
> +DEF_HELPER_3(smmwb_u, tl, env, tl, tl)
> +DEF_HELPER_3(smmwt, tl, env, tl, tl)
> +DEF_HELPER_3(smmwt_u, tl, env, tl, tl)
> +DEF_HELPER_4(kmmawb, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawb_u, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawt, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawt_u, tl, env, tl, tl, tl)
> +DEF_HELPER_3(kmmwb2, tl, env, tl, tl)
> +DEF_HELPER_3(kmmwb2_u, tl, env, tl, tl)
> +DEF_HELPER_3(kmmwt2, tl, env, tl, tl)
> +DEF_HELPER_3(kmmwt2_u, tl, env, tl, tl)
> +DEF_HELPER_4(kmmawb2, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawb2_u, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawt2, tl, env, tl, tl, tl)
> +DEF_HELPER_4(kmmawt2_u, tl, env, tl, tl, tl)
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index e0be2790dc..6e63bab2d9 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -745,3 +745,20 @@ kmmsb      0100001  ..... ..... 001 ..... 1111111 @r
>  kmmsb_u    0101001  ..... ..... 001 ..... 1111111 @r
>  kwmmul     0110001  ..... ..... 001 ..... 1111111 @r
>  kwmmul_u   0111001  ..... ..... 001 ..... 1111111 @r
> +
> +smmwb      0100010  ..... ..... 001 ..... 1111111 @r
> +smmwb_u    0101010  ..... ..... 001 ..... 1111111 @r
> +smmwt      0110010  ..... ..... 001 ..... 1111111 @r
> +smmwt_u    0111010  ..... ..... 001 ..... 1111111 @r
> +kmmawb     0100011  ..... ..... 001 ..... 1111111 @r
> +kmmawb_u   0101011  ..... ..... 001 ..... 1111111 @r
> +kmmawt     0110011  ..... ..... 001 ..... 1111111 @r
> +kmmawt_u   0111011  ..... ..... 001 ..... 1111111 @r
> +kmmwb2     1000111  ..... ..... 001 ..... 1111111 @r
> +kmmwb2_u   1001111  ..... ..... 001 ..... 1111111 @r
> +kmmwt2     1010111  ..... ..... 001 ..... 1111111 @r
> +kmmwt2_u   1011111  ..... ..... 001 ..... 1111111 @r
> +kmmawb2    1100111  ..... ..... 001 ..... 1111111 @r
> +kmmawb2_u  1101111  ..... ..... 001 ..... 1111111 @r
> +kmmawt2    1110111  ..... ..... 001 ..... 1111111 @r
> +kmmawt2_u  1111111  ..... ..... 001 ..... 1111111 @r
> diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc
> index fbc9c0b57b..e708ae7a6a 100644
> --- a/target/riscv/insn_trans/trans_rvp.c.inc
> +++ b/target/riscv/insn_trans/trans_rvp.c.inc
> @@ -564,3 +564,21 @@ GEN_RVP_R_ACC_OOL(kmmsb);
>  GEN_RVP_R_ACC_OOL(kmmsb_u);
>  GEN_RVP_R_OOL(kwmmul);
>  GEN_RVP_R_OOL(kwmmul_u);
> +
> +/* Most Significant Word "32x16" Multiply & Add Instructions */
> +GEN_RVP_R_OOL(smmwb);
> +GEN_RVP_R_OOL(smmwb_u);
> +GEN_RVP_R_OOL(smmwt);
> +GEN_RVP_R_OOL(smmwt_u);
> +GEN_RVP_R_ACC_OOL(kmmawb);
> +GEN_RVP_R_ACC_OOL(kmmawb_u);
> +GEN_RVP_R_ACC_OOL(kmmawt);
> +GEN_RVP_R_ACC_OOL(kmmawt_u);
> +GEN_RVP_R_OOL(kmmwb2);
> +GEN_RVP_R_OOL(kmmwb2_u);
> +GEN_RVP_R_OOL(kmmwt2);
> +GEN_RVP_R_OOL(kmmwt2_u);
> +GEN_RVP_R_ACC_OOL(kmmawb2);
> +GEN_RVP_R_ACC_OOL(kmmawb2_u);
> +GEN_RVP_R_ACC_OOL(kmmawt2);
> +GEN_RVP_R_ACC_OOL(kmmawt2_u);
> diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
> index c1322d2fac..ea3c9f6dd8 100644
> --- a/target/riscv/packed_helper.c
> +++ b/target/riscv/packed_helper.c
> @@ -1477,3 +1477,211 @@ static inline void do_kwmmul_u(CPURISCVState *env, void *vd, void *va,
>  }
>
>  RVPR(kwmmul_u, 1, 4);
> +
> +/* Most Significant Word "32x16" Multiply & Add Instructions */
> +static inline void do_smmwb(CPURISCVState *env, void *vd, void *va,
> +                            void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16;
> +}
> +
> +RVPR(smmwb, 1, 4);
> +
> +static inline void do_smmwb_u(CPURISCVState *env, void *vd, void *va,
> +                              void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 15)) >> 16;
> +}
> +
> +RVPR(smmwb_u, 1, 4);
> +
> +static inline void do_smmwt(CPURISCVState *env, void *vd, void *va,
> +                            void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16;
> +}
> +
> +RVPR(smmwt, 1, 4);
> +
> +static inline void do_smmwt_u(CPURISCVState *env, void *vd, void *va,
> +                              void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 15)) >> 16;
> +}
> +
> +RVPR(smmwt_u, 1, 4);
> +
> +static inline void do_kmmawb(CPURISCVState *env, void *vd, void *va,
> +                             void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc;
> +    int16_t *b = vb;
> +    d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawb, 1, 4);
> +
> +static inline void do_kmmawb_u(CPURISCVState *env, void *vd, void *va,
> +                               void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc;
> +    int16_t *b = vb;
> +    d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i)] +
> +                               (1ull << 15)) >> 16, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawb_u, 1, 4);
> +
> +static inline void do_kmmawt(CPURISCVState *env, void *vd, void *va,
> +                             void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc;
> +    int16_t *b = vb;
> +    d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16,
> +                      c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawt, 1, 4);
> +
> +static inline void do_kmmawt_u(CPURISCVState *env, void *vd, void *va,
> +                               void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc;
> +    int16_t *b = vb;
> +    d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] +
> +                               (1ull << 15)) >> 16, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawt_u, 1, 4);
> +
> +static inline void do_kmmwb2(CPURISCVState *env, void *vd, void *va,
> +                             void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        d[H4(i)] = INT32_MAX;
> +    } else {
> +        d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15;
> +    }
> +}
> +
> +RVPR(kmmwb2, 1, 4);
> +
> +static inline void do_kmmwb2_u(CPURISCVState *env, void *vd, void *va,
> +                               void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        d[H4(i)] = INT32_MAX;
> +    } else {
> +        d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15;
> +    }
> +}
> +
> +RVPR(kmmwb2_u, 1, 4);
> +
> +static inline void do_kmmwt2(CPURISCVState *env, void *vd, void *va,
> +                             void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        d[H4(i)] = INT32_MAX;
> +    } else {
> +        d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15;
> +    }
> +}
> +
> +RVPR(kmmwt2, 1, 4);
> +
> +static inline void do_kmmwt2_u(CPURISCVState *env, void *vd, void *va,
> +                               void *vb, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        d[H4(i)] = INT32_MAX;
> +    } else {
> +        d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15;
> +    }
> +}
> +
> +RVPR(kmmwt2_u, 1, 4);
> +
> +static inline void do_kmmawb2(CPURISCVState *env, void *vd, void *va,
> +                              void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc, result;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        result = INT32_MAX;
> +    } else {
> +        result = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15;
> +    }
> +    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawb2, 1, 4);
> +
> +static inline void do_kmmawb2_u(CPURISCVState *env, void *vd, void *va,
> +                                void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc, result;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        result = INT32_MAX;
> +    } else {
> +        result = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15;
> +    }
> +    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawb2_u, 1, 4);
> +
> +static inline void do_kmmawt2(CPURISCVState *env, void *vd, void *va,
> +                              void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc, result;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        result = INT32_MAX;
> +    } else {
> +        result = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15;
> +    }
> +    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawt2, 1, 4);
> +
> +static inline void do_kmmawt2_u(CPURISCVState *env, void *vd, void *va,
> +                                void *vb, void *vc, uint8_t i)
> +{
> +    int32_t *d = vd, *a = va, *c = vc, result;
> +    int16_t *b = vb;
> +    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
> +        env->vxsat = 0x1;
> +        result = INT32_MAX;
> +    } else {
> +        result = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15;
> +    }
> +    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
> +}
> +
> +RVPR_ACC(kmmawt2_u, 1, 4);
> --
> 2.17.1
>


  reply	other threads:[~2021-03-16 16:12 UTC|newest]

Thread overview: 150+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-12 15:02 [PATCH 00/38] target/riscv: support packed extension v0.9.2 LIU Zhiwei
2021-02-12 15:02 ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 01/38] target/riscv: implementation-defined constant parameters LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-09 14:08   ` Alistair Francis
2021-03-09 14:08     ` Alistair Francis
2021-02-12 15:02 ` [PATCH 02/38] target/riscv: Hoist vector functions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-09 14:10   ` Alistair Francis
2021-03-09 14:10     ` Alistair Francis
2021-02-12 15:02 ` [PATCH 03/38] target/riscv: Fixup saturate subtract function LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 18:52   ` Richard Henderson
2021-02-12 18:52     ` Richard Henderson
2021-03-09 14:11   ` Alistair Francis
2021-03-09 14:11     ` Alistair Francis
2021-02-12 15:02 ` [PATCH 04/38] target/riscv: 16-bit Addition & Subtraction Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 18:03   ` Richard Henderson
2021-02-12 18:03     ` Richard Henderson
2021-02-18  8:39     ` LIU Zhiwei
2021-02-18  8:39       ` LIU Zhiwei
2021-02-18 16:20       ` Richard Henderson
2021-02-18 16:20         ` Richard Henderson
2021-02-12 19:02   ` Richard Henderson
2021-02-12 19:02     ` Richard Henderson
2021-02-18  8:47     ` LIU Zhiwei
2021-02-18  8:47       ` LIU Zhiwei
2021-02-18 16:21       ` Richard Henderson
2021-02-18 16:21         ` Richard Henderson
2021-02-12 15:02 ` [PATCH 05/38] target/riscv: 8-bit Addition & Subtraction Instruction LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-15 21:22   ` Alistair Francis
2021-03-15 21:22     ` Alistair Francis
2021-05-24  1:00     ` Palmer Dabbelt
2021-05-24  1:00       ` Palmer Dabbelt
2021-05-26  5:43       ` LIU Zhiwei
2021-05-26  5:43         ` LIU Zhiwei
2021-05-26  6:15         ` Palmer Dabbelt
2021-05-26  6:15           ` Palmer Dabbelt
2021-02-12 15:02 ` [PATCH 06/38] target/riscv: SIMD 16-bit Shift Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-15 21:25   ` Alistair Francis
2021-03-15 21:25     ` Alistair Francis
2021-03-16  2:40     ` LIU Zhiwei
2021-03-16  2:40       ` LIU Zhiwei
2021-03-16 19:54       ` Alistair Francis
2021-03-16 19:54         ` Alistair Francis
2021-03-17  2:30         ` LIU Zhiwei
2021-03-17  2:30           ` LIU Zhiwei
2021-03-17 20:39           ` Alistair Francis
2021-03-17 20:39             ` Alistair Francis
2021-02-12 15:02 ` [PATCH 07/38] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-15 21:27   ` Alistair Francis
2021-03-15 21:27     ` Alistair Francis
2021-05-24  4:46   ` Palmer Dabbelt
2021-05-24  4:46     ` Palmer Dabbelt
2021-02-12 15:02 ` [PATCH 08/38] target/riscv: SIMD 16-bit Compare Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-15 21:28   ` Alistair Francis
2021-03-15 21:28     ` Alistair Francis
2021-05-26  5:30   ` Palmer Dabbelt
2021-05-26  5:30     ` Palmer Dabbelt
2021-05-26  5:31     ` Palmer Dabbelt
2021-05-26  5:31       ` Palmer Dabbelt
2021-02-12 15:02 ` [PATCH 09/38] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-15 21:31   ` Alistair Francis
2021-03-15 21:31     ` Alistair Francis
2021-02-12 15:02 ` [PATCH 10/38] target/riscv: SIMD 16-bit Multiply Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 11/38] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-15 21:33   ` Alistair Francis
2021-03-15 21:33     ` Alistair Francis
2021-02-12 15:02 ` [PATCH 12/38] target/riscv: SIMD 16-bit Miscellaneous Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-15 21:35   ` Alistair Francis
2021-03-15 21:35     ` Alistair Francis
2021-02-12 15:02 ` [PATCH 13/38] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-16 14:38   ` Alistair Francis
2021-03-16 14:38     ` Alistair Francis
2021-02-12 15:02 ` [PATCH 14/38] target/riscv: 8-bit Unpacking Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-16 14:40   ` Alistair Francis
2021-03-16 14:40     ` Alistair Francis
2021-02-12 15:02 ` [PATCH 15/38] target/riscv: 16-bit Packing Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-16 14:42   ` Alistair Francis
2021-03-16 14:42     ` Alistair Francis
2021-02-12 15:02 ` [PATCH 16/38] target/riscv: Signed MSW 32x32 Multiply and Add Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 17/38] target/riscv: Signed MSW 32x16 " LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-16 16:01   ` Alistair Francis [this message]
2021-03-16 16:01     ` Alistair Francis
2021-02-12 15:02 ` [PATCH 18/38] target/riscv: Signed 16-bit Multiply 32-bit Add/Subtract Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 19/38] target/riscv: Signed 16-bit Multiply 64-bit " LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 20/38] target/riscv: Partial-SIMD Miscellaneous Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-16 19:44   ` Alistair Francis
2021-03-16 19:44     ` Alistair Francis
2021-02-12 15:02 ` [PATCH 21/38] target/riscv: 8-bit Multiply with 32-bit Add Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 22/38] target/riscv: 64-bit Add/Subtract Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 23/38] target/riscv: 32-bit Multiply " LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 24/38] target/riscv: Signed 16-bit Multiply with " LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 25/38] target/riscv: Non-SIMD Q15 saturation ALU Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 26/38] target/riscv: Non-SIMD Q31 " LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 27/38] target/riscv: 32-bit Computation Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 28/38] target/riscv: Non-SIMD Miscellaneous Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 29/38] target/riscv: RV64 Only SIMD 32-bit Add/Subtract Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 30/38] target/riscv: RV64 Only SIMD 32-bit Shift Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 31/38] target/riscv: RV64 Only SIMD 32-bit Miscellaneous Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 32/38] target/riscv: RV64 Only SIMD Q15 saturating Multiply Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 33/38] target/riscv: RV64 Only 32-bit " LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 34/38] target/riscv: RV64 Only 32-bit Multiply & Add Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 35/38] target/riscv: RV64 Only 32-bit Parallel " LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 36/38] target/riscv: RV64 Only Non-SIMD 32-bit Shift Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 37/38] target/riscv: RV64 Only 32-bit Packing Instructions LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-02-12 15:02 ` [PATCH 38/38] target/riscv: configure and turn on packed extension from command line LIU Zhiwei
2021-02-12 15:02   ` LIU Zhiwei
2021-03-05  6:14 ` [PATCH 00/38] target/riscv: support packed extension v0.9.2 LIU Zhiwei
2021-03-05  6:14   ` LIU Zhiwei
2021-04-13  3:27 ` LIU Zhiwei
2021-04-13  3:27   ` LIU Zhiwei
2021-04-15  4:46   ` Alistair Francis
2021-04-15  4:46     ` Alistair Francis
2021-04-15  5:50     ` LIU Zhiwei
2021-04-15  5:50       ` LIU Zhiwei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAKmqyKN54Tu2TKLf9Tq0anusS_73JMPS6PwA4yMYwZD+XKf1Mg@mail.gmail.com \
    --to=alistair23@gmail.com \
    --cc=palmer@dabbelt.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-riscv@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=zhiwei_liu@c-sky.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.