From: LIU Zhiwei <zhiwei_liu@c-sky.com> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Cc: palmer@dabbelt.com, richard.henderson@linaro.org, bin.meng@windriver.com, Alistair Francis <alistair.francis@wdc.com>, LIU Zhiwei <zhiwei_liu@c-sky.com> Subject: [PATCH v2 16/37] target/riscv: Signed MSW 32x16 Multiply and Add Instructions Date: Thu, 10 Jun 2021 15:58:47 +0800 [thread overview] Message-ID: <20210610075908.3305506-17-zhiwei_liu@c-sky.com> (raw) In-Reply-To: <20210610075908.3305506-1-zhiwei_liu@c-sky.com> Always contain a 32x16 multiplification and the most significant word can be used as the result, or an operand for an add or subtract operation with rounding or saturation. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 17 ++ target/riscv/insn32.decode | 17 ++ target/riscv/insn_trans/trans_rvp.c.inc | 18 ++ target/riscv/packed_helper.c | 208 ++++++++++++++++++++++++ 4 files changed, 260 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 600e8dee44..854f48d385 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1280,3 +1280,20 @@ DEF_HELPER_4(kmmsb, tl, env, tl, tl, tl) DEF_HELPER_4(kmmsb_u, tl, env, tl, tl, tl) DEF_HELPER_3(kwmmul, tl, env, tl, tl) DEF_HELPER_3(kwmmul_u, tl, env, tl, tl) + +DEF_HELPER_3(smmwb, tl, env, tl, tl) +DEF_HELPER_3(smmwb_u, tl, env, tl, tl) +DEF_HELPER_3(smmwt, tl, env, tl, tl) +DEF_HELPER_3(smmwt_u, tl, env, tl, tl) +DEF_HELPER_4(kmmawb, tl, env, tl, tl, tl) +DEF_HELPER_4(kmmawb_u, tl, env, tl, tl, tl) +DEF_HELPER_4(kmmawt, tl, env, tl, tl, tl) +DEF_HELPER_4(kmmawt_u, tl, env, tl, tl, tl) +DEF_HELPER_3(kmmwb2, tl, env, tl, tl) +DEF_HELPER_3(kmmwb2_u, tl, env, tl, tl) +DEF_HELPER_3(kmmwt2, tl, env, tl, tl) +DEF_HELPER_3(kmmwt2_u, tl, env, tl, tl) +DEF_HELPER_4(kmmawb2, tl, env, tl, tl, tl) +DEF_HELPER_4(kmmawb2_u, tl, env, tl, tl, tl) +DEF_HELPER_4(kmmawt2, tl, env, tl, tl, tl) +DEF_HELPER_4(kmmawt2_u, tl, env, tl, tl, tl) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 0484de140b..e5a8f663dc 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -883,3 +883,20 @@ kmmsb 0100001 ..... ..... 001 ..... 1110111 @r kmmsb_u 0101001 ..... ..... 001 ..... 1110111 @r kwmmul 0110001 ..... ..... 001 ..... 1110111 @r kwmmul_u 0111001 ..... ..... 001 ..... 1110111 @r + +smmwb 0100010 ..... ..... 001 ..... 1110111 @r +smmwb_u 0101010 ..... ..... 001 ..... 1110111 @r +smmwt 0110010 ..... ..... 001 ..... 1110111 @r +smmwt_u 0111010 ..... ..... 001 ..... 1110111 @r +kmmawb 0100011 ..... ..... 001 ..... 1110111 @r +kmmawb_u 0101011 ..... ..... 001 ..... 1110111 @r +kmmawt 0110011 ..... ..... 001 ..... 1110111 @r +kmmawt_u 0111011 ..... ..... 001 ..... 1110111 @r +kmmwb2 1000111 ..... ..... 001 ..... 1110111 @r +kmmwb2_u 1001111 ..... ..... 001 ..... 1110111 @r +kmmwt2 1010111 ..... ..... 001 ..... 1110111 @r +kmmwt2_u 1011111 ..... ..... 001 ..... 1110111 @r +kmmawb2 1100111 ..... ..... 001 ..... 1110111 @r +kmmawb2_u 1101111 ..... ..... 001 ..... 1110111 @r +kmmawt2 1110111 ..... ..... 001 ..... 1110111 @r +kmmawt2_u 1111111 ..... ..... 001 ..... 1110111 @r diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc index 073558b950..af490a5ef0 100644 --- a/target/riscv/insn_trans/trans_rvp.c.inc +++ b/target/riscv/insn_trans/trans_rvp.c.inc @@ -413,3 +413,21 @@ GEN_RVP_R_ACC_OOL(kmmsb); GEN_RVP_R_ACC_OOL(kmmsb_u); GEN_RVP_R_OOL(kwmmul); GEN_RVP_R_OOL(kwmmul_u); + +/* Most Significant Word “32x16” Multiply & Add Instructions */ +GEN_RVP_R_OOL(smmwb); +GEN_RVP_R_OOL(smmwb_u); +GEN_RVP_R_OOL(smmwt); +GEN_RVP_R_OOL(smmwt_u); +GEN_RVP_R_ACC_OOL(kmmawb); +GEN_RVP_R_ACC_OOL(kmmawb_u); +GEN_RVP_R_ACC_OOL(kmmawt); +GEN_RVP_R_ACC_OOL(kmmawt_u); +GEN_RVP_R_OOL(kmmwb2); +GEN_RVP_R_OOL(kmmwb2_u); +GEN_RVP_R_OOL(kmmwt2); +GEN_RVP_R_OOL(kmmwt2_u); +GEN_RVP_R_ACC_OOL(kmmawb2); +GEN_RVP_R_ACC_OOL(kmmawb2_u); +GEN_RVP_R_ACC_OOL(kmmawt2); +GEN_RVP_R_ACC_OOL(kmmawt2_u); diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c index 465cb5a3b3..868a1a71ba 100644 --- a/target/riscv/packed_helper.c +++ b/target/riscv/packed_helper.c @@ -1468,3 +1468,211 @@ static inline void do_kwmmul_u(CPURISCVState *env, void *vd, void *va, } RVPR(kwmmul_u, 1, 4); + +/* Most Significant Word “32x16” Multiply & Add Instructions */ +static inline void do_smmwb(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16; +} + +RVPR(smmwb, 1, 4); + +static inline void do_smmwb_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 15)) >> 16; +} + +RVPR(smmwb_u, 1, 4); + +static inline void do_smmwt(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16; +} + +RVPR(smmwt, 1, 4); + +static inline void do_smmwt_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 15)) >> 16; +} + +RVPR(smmwt_u, 1, 4); + +static inline void do_kmmawb(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc; + int16_t *b = vb; + d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16, c[H4(i)]); +} + +RVPR_ACC(kmmawb, 1, 4); + +static inline void do_kmmawb_u(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc; + int16_t *b = vb; + d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i)] + + (1ull << 15)) >> 16, c[H4(i)]); +} + +RVPR_ACC(kmmawb_u, 1, 4); + +static inline void do_kmmawt(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc; + int16_t *b = vb; + d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16, + c[H4(i)]); +} + +RVPR_ACC(kmmawt, 1, 4); + +static inline void do_kmmawt_u(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc; + int16_t *b = vb; + d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + + (1ull << 15)) >> 16, c[H4(i)]); +} + +RVPR_ACC(kmmawt_u, 1, 4); + +static inline void do_kmmwb2(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) { + env->vxsat = 0x1; + d[H4(i)] = INT32_MAX; + } else { + d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15; + } +} + +RVPR(kmmwb2, 1, 4); + +static inline void do_kmmwb2_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) { + env->vxsat = 0x1; + d[H4(i)] = INT32_MAX; + } else { + d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15; + } +} + +RVPR(kmmwb2_u, 1, 4); + +static inline void do_kmmwt2(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) { + env->vxsat = 0x1; + d[H4(i)] = INT32_MAX; + } else { + d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15; + } +} + +RVPR(kmmwt2, 1, 4); + +static inline void do_kmmwt2_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) { + env->vxsat = 0x1; + d[H4(i)] = INT32_MAX; + } else { + d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15; + } +} + +RVPR(kmmwt2_u, 1, 4); + +static inline void do_kmmawb2(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc, result; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) { + env->vxsat = 0x1; + result = INT32_MAX; + } else { + result = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15; + } + d[H4(i)] = sadd32(env, 0, result, c[H4(i)]); +} + +RVPR_ACC(kmmawb2, 1, 4); + +static inline void do_kmmawb2_u(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc, result; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) { + env->vxsat = 0x1; + result = INT32_MAX; + } else { + result = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15; + } + d[H4(i)] = sadd32(env, 0, result, c[H4(i)]); +} + +RVPR_ACC(kmmawb2_u, 1, 4); + +static inline void do_kmmawt2(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc, result; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) { + env->vxsat = 0x1; + result = INT32_MAX; + } else { + result = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15; + } + d[H4(i)] = sadd32(env, 0, result, c[H4(i)]); +} + +RVPR_ACC(kmmawt2, 1, 4); + +static inline void do_kmmawt2_u(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc, result; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) { + env->vxsat = 0x1; + result = INT32_MAX; + } else { + result = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15; + } + d[H4(i)] = sadd32(env, 0, result, c[H4(i)]); +} + +RVPR_ACC(kmmawt2_u, 1, 4); -- 2.25.1
WARNING: multiple messages have this Message-ID (diff)
From: LIU Zhiwei <zhiwei_liu@c-sky.com> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Cc: Alistair.Francis@wdc.com, palmer@dabbelt.com, bin.meng@windriver.com, richard.henderson@linaro.org, LIU Zhiwei <zhiwei_liu@c-sky.com>, Alistair Francis <alistair.francis@wdc.com> Subject: [PATCH v2 16/37] target/riscv: Signed MSW 32x16 Multiply and Add Instructions Date: Thu, 10 Jun 2021 15:58:47 +0800 [thread overview] Message-ID: <20210610075908.3305506-17-zhiwei_liu@c-sky.com> (raw) In-Reply-To: <20210610075908.3305506-1-zhiwei_liu@c-sky.com> Always contain a 32x16 multiplification and the most significant word can be used as the result, or an operand for an add or subtract operation with rounding or saturation. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 17 ++ target/riscv/insn32.decode | 17 ++ target/riscv/insn_trans/trans_rvp.c.inc | 18 ++ target/riscv/packed_helper.c | 208 ++++++++++++++++++++++++ 4 files changed, 260 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 600e8dee44..854f48d385 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1280,3 +1280,20 @@ DEF_HELPER_4(kmmsb, tl, env, tl, tl, tl) DEF_HELPER_4(kmmsb_u, tl, env, tl, tl, tl) DEF_HELPER_3(kwmmul, tl, env, tl, tl) DEF_HELPER_3(kwmmul_u, tl, env, tl, tl) + +DEF_HELPER_3(smmwb, tl, env, tl, tl) +DEF_HELPER_3(smmwb_u, tl, env, tl, tl) +DEF_HELPER_3(smmwt, tl, env, tl, tl) +DEF_HELPER_3(smmwt_u, tl, env, tl, tl) +DEF_HELPER_4(kmmawb, tl, env, tl, tl, tl) +DEF_HELPER_4(kmmawb_u, tl, env, tl, tl, tl) +DEF_HELPER_4(kmmawt, tl, env, tl, tl, tl) +DEF_HELPER_4(kmmawt_u, tl, env, tl, tl, tl) +DEF_HELPER_3(kmmwb2, tl, env, tl, tl) +DEF_HELPER_3(kmmwb2_u, tl, env, tl, tl) +DEF_HELPER_3(kmmwt2, tl, env, tl, tl) +DEF_HELPER_3(kmmwt2_u, tl, env, tl, tl) +DEF_HELPER_4(kmmawb2, tl, env, tl, tl, tl) +DEF_HELPER_4(kmmawb2_u, tl, env, tl, tl, tl) +DEF_HELPER_4(kmmawt2, tl, env, tl, tl, tl) +DEF_HELPER_4(kmmawt2_u, tl, env, tl, tl, tl) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 0484de140b..e5a8f663dc 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -883,3 +883,20 @@ kmmsb 0100001 ..... ..... 001 ..... 1110111 @r kmmsb_u 0101001 ..... ..... 001 ..... 1110111 @r kwmmul 0110001 ..... ..... 001 ..... 1110111 @r kwmmul_u 0111001 ..... ..... 001 ..... 1110111 @r + +smmwb 0100010 ..... ..... 001 ..... 1110111 @r +smmwb_u 0101010 ..... ..... 001 ..... 1110111 @r +smmwt 0110010 ..... ..... 001 ..... 1110111 @r +smmwt_u 0111010 ..... ..... 001 ..... 1110111 @r +kmmawb 0100011 ..... ..... 001 ..... 1110111 @r +kmmawb_u 0101011 ..... ..... 001 ..... 1110111 @r +kmmawt 0110011 ..... ..... 001 ..... 1110111 @r +kmmawt_u 0111011 ..... ..... 001 ..... 1110111 @r +kmmwb2 1000111 ..... ..... 001 ..... 1110111 @r +kmmwb2_u 1001111 ..... ..... 001 ..... 1110111 @r +kmmwt2 1010111 ..... ..... 001 ..... 1110111 @r +kmmwt2_u 1011111 ..... ..... 001 ..... 1110111 @r +kmmawb2 1100111 ..... ..... 001 ..... 1110111 @r +kmmawb2_u 1101111 ..... ..... 001 ..... 1110111 @r +kmmawt2 1110111 ..... ..... 001 ..... 1110111 @r +kmmawt2_u 1111111 ..... ..... 001 ..... 1110111 @r diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc index 073558b950..af490a5ef0 100644 --- a/target/riscv/insn_trans/trans_rvp.c.inc +++ b/target/riscv/insn_trans/trans_rvp.c.inc @@ -413,3 +413,21 @@ GEN_RVP_R_ACC_OOL(kmmsb); GEN_RVP_R_ACC_OOL(kmmsb_u); GEN_RVP_R_OOL(kwmmul); GEN_RVP_R_OOL(kwmmul_u); + +/* Most Significant Word “32x16” Multiply & Add Instructions */ +GEN_RVP_R_OOL(smmwb); +GEN_RVP_R_OOL(smmwb_u); +GEN_RVP_R_OOL(smmwt); +GEN_RVP_R_OOL(smmwt_u); +GEN_RVP_R_ACC_OOL(kmmawb); +GEN_RVP_R_ACC_OOL(kmmawb_u); +GEN_RVP_R_ACC_OOL(kmmawt); +GEN_RVP_R_ACC_OOL(kmmawt_u); +GEN_RVP_R_OOL(kmmwb2); +GEN_RVP_R_OOL(kmmwb2_u); +GEN_RVP_R_OOL(kmmwt2); +GEN_RVP_R_OOL(kmmwt2_u); +GEN_RVP_R_ACC_OOL(kmmawb2); +GEN_RVP_R_ACC_OOL(kmmawb2_u); +GEN_RVP_R_ACC_OOL(kmmawt2); +GEN_RVP_R_ACC_OOL(kmmawt2_u); diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c index 465cb5a3b3..868a1a71ba 100644 --- a/target/riscv/packed_helper.c +++ b/target/riscv/packed_helper.c @@ -1468,3 +1468,211 @@ static inline void do_kwmmul_u(CPURISCVState *env, void *vd, void *va, } RVPR(kwmmul_u, 1, 4); + +/* Most Significant Word “32x16” Multiply & Add Instructions */ +static inline void do_smmwb(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16; +} + +RVPR(smmwb, 1, 4); + +static inline void do_smmwb_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 15)) >> 16; +} + +RVPR(smmwb_u, 1, 4); + +static inline void do_smmwt(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16; +} + +RVPR(smmwt, 1, 4); + +static inline void do_smmwt_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 15)) >> 16; +} + +RVPR(smmwt_u, 1, 4); + +static inline void do_kmmawb(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc; + int16_t *b = vb; + d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16, c[H4(i)]); +} + +RVPR_ACC(kmmawb, 1, 4); + +static inline void do_kmmawb_u(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc; + int16_t *b = vb; + d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i)] + + (1ull << 15)) >> 16, c[H4(i)]); +} + +RVPR_ACC(kmmawb_u, 1, 4); + +static inline void do_kmmawt(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc; + int16_t *b = vb; + d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16, + c[H4(i)]); +} + +RVPR_ACC(kmmawt, 1, 4); + +static inline void do_kmmawt_u(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc; + int16_t *b = vb; + d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + + (1ull << 15)) >> 16, c[H4(i)]); +} + +RVPR_ACC(kmmawt_u, 1, 4); + +static inline void do_kmmwb2(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) { + env->vxsat = 0x1; + d[H4(i)] = INT32_MAX; + } else { + d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15; + } +} + +RVPR(kmmwb2, 1, 4); + +static inline void do_kmmwb2_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) { + env->vxsat = 0x1; + d[H4(i)] = INT32_MAX; + } else { + d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15; + } +} + +RVPR(kmmwb2_u, 1, 4); + +static inline void do_kmmwt2(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) { + env->vxsat = 0x1; + d[H4(i)] = INT32_MAX; + } else { + d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15; + } +} + +RVPR(kmmwt2, 1, 4); + +static inline void do_kmmwt2_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int32_t *d = vd, *a = va; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) { + env->vxsat = 0x1; + d[H4(i)] = INT32_MAX; + } else { + d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15; + } +} + +RVPR(kmmwt2_u, 1, 4); + +static inline void do_kmmawb2(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc, result; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) { + env->vxsat = 0x1; + result = INT32_MAX; + } else { + result = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15; + } + d[H4(i)] = sadd32(env, 0, result, c[H4(i)]); +} + +RVPR_ACC(kmmawb2, 1, 4); + +static inline void do_kmmawb2_u(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc, result; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) { + env->vxsat = 0x1; + result = INT32_MAX; + } else { + result = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15; + } + d[H4(i)] = sadd32(env, 0, result, c[H4(i)]); +} + +RVPR_ACC(kmmawb2_u, 1, 4); + +static inline void do_kmmawt2(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc, result; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) { + env->vxsat = 0x1; + result = INT32_MAX; + } else { + result = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15; + } + d[H4(i)] = sadd32(env, 0, result, c[H4(i)]); +} + +RVPR_ACC(kmmawt2, 1, 4); + +static inline void do_kmmawt2_u(CPURISCVState *env, void *vd, void *va, + void *vb, void *vc, uint8_t i) +{ + int32_t *d = vd, *a = va, *c = vc, result; + int16_t *b = vb; + if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) { + env->vxsat = 0x1; + result = INT32_MAX; + } else { + result = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15; + } + d[H4(i)] = sadd32(env, 0, result, c[H4(i)]); +} + +RVPR_ACC(kmmawt2_u, 1, 4); -- 2.25.1
next prev parent reply other threads:[~2021-06-10 8:21 UTC|newest] Thread overview: 88+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-06-10 7:58 [PATCH v2 00/37] target/riscv: support packed extension v0.9.4 LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 01/37] target/riscv: implementation-defined constant parameters LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 02/37] target/riscv: Make the vector helper functions public LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 03/37] target/riscv: 16-bit Addition & Subtraction Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 18:00 ` Richard Henderson 2021-06-10 18:00 ` Richard Henderson 2021-06-10 7:58 ` [PATCH v2 04/37] target/riscv: 8-bit Addition & Subtraction Instruction LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 19:39 ` Richard Henderson 2021-06-10 19:39 ` Richard Henderson 2021-06-11 4:36 ` LIU Zhiwei 2021-06-11 4:36 ` LIU Zhiwei 2021-06-24 6:05 ` LIU Zhiwei 2021-06-24 6:05 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 05/37] target/riscv: SIMD 16-bit Shift Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 19:44 ` Richard Henderson 2021-06-10 19:44 ` Richard Henderson 2021-06-10 7:58 ` [PATCH v2 06/37] target/riscv: SIMD 8-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 07/37] target/riscv: SIMD 16-bit Compare Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 08/37] target/riscv: SIMD 8-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 09/37] target/riscv: SIMD 16-bit Multiply Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 10/37] target/riscv: SIMD 8-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 11/37] target/riscv: SIMD 16-bit Miscellaneous Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 12/37] target/riscv: SIMD 8-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 13/37] target/riscv: 8-bit Unpacking Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 14/37] target/riscv: 16-bit Packing Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 15/37] target/riscv: Signed MSW 32x32 Multiply and Add Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei [this message] 2021-06-10 7:58 ` [PATCH v2 16/37] target/riscv: Signed MSW 32x16 " LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 17/37] target/riscv: Signed 16-bit Multiply 32-bit Add/Subtract Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 18/37] target/riscv: Signed 16-bit Multiply 64-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 19/37] target/riscv: Partial-SIMD Miscellaneous Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 20/37] target/riscv: 8-bit Multiply with 32-bit Add Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 21/37] target/riscv: 64-bit Add/Subtract Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 22/37] target/riscv: 32-bit Multiply " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 23/37] target/riscv: Signed 16-bit Multiply with " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 24/37] target/riscv: Non-SIMD Q15 saturation ALU Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 25/37] target/riscv: Non-SIMD Q31 " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 26/37] target/riscv: 32-bit Computation Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 27/37] target/riscv: Non-SIMD Miscellaneous Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 28/37] target/riscv: RV64 Only SIMD 32-bit Add/Subtract Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 29/37] target/riscv: RV64 Only SIMD 32-bit Shift Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 30/37] target/riscv: RV64 Only SIMD 32-bit Miscellaneous Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 31/37] target/riscv: RV64 Only SIMD Q15 saturating Multiply Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 32/37] target/riscv: RV64 Only 32-bit " LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 33/37] target/riscv: RV64 Only 32-bit Multiply & Add Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 34/37] target/riscv: RV64 Only 32-bit Parallel " LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 35/37] target/riscv: RV64 Only Non-SIMD 32-bit Shift Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 36/37] target/riscv: RV64 Only 32-bit Packing Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 37/37] target/riscv: configure and turn on packed extension from command line LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-14 22:55 ` [PATCH v2 00/37] target/riscv: support packed extension v0.9.4 no-reply 2021-06-14 22:55 ` no-reply
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210610075908.3305506-17-zhiwei_liu@c-sky.com \ --to=zhiwei_liu@c-sky.com \ --cc=alistair.francis@wdc.com \ --cc=bin.meng@windriver.com \ --cc=palmer@dabbelt.com \ --cc=qemu-devel@nongnu.org \ --cc=qemu-riscv@nongnu.org \ --cc=richard.henderson@linaro.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.