All of lore.kernel.org
 help / color / mirror / Atom feed
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: palmer@dabbelt.com, bin.meng@windriver.com,
	Alistair.Francis@wdc.com, LIU Zhiwei <zhiwei_liu@c-sky.com>
Subject: [PATCH v3 16/37] target/riscv: Signed MSW 32x16 Multiply and Add Instructions
Date: Thu, 24 Jun 2021 18:55:00 +0800	[thread overview]
Message-ID: <20210624105521.3964-17-zhiwei_liu@c-sky.com> (raw)
In-Reply-To: <20210624105521.3964-1-zhiwei_liu@c-sky.com>

Always contain a 32x16 multiplification and the most significant
word can be used as the result, or an operand for an add or
subtract operation with rounding or saturation.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
---
 target/riscv/helper.h                   |  17 ++
 target/riscv/insn32.decode              |  17 ++
 target/riscv/insn_trans/trans_rvp.c.inc |  18 ++
 target/riscv/packed_helper.c            | 208 ++++++++++++++++++++++++
 4 files changed, 260 insertions(+)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 600e8dee44..854f48d385 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1280,3 +1280,20 @@ DEF_HELPER_4(kmmsb, tl, env, tl, tl, tl)
 DEF_HELPER_4(kmmsb_u, tl, env, tl, tl, tl)
 DEF_HELPER_3(kwmmul, tl, env, tl, tl)
 DEF_HELPER_3(kwmmul_u, tl, env, tl, tl)
+
+DEF_HELPER_3(smmwb, tl, env, tl, tl)
+DEF_HELPER_3(smmwb_u, tl, env, tl, tl)
+DEF_HELPER_3(smmwt, tl, env, tl, tl)
+DEF_HELPER_3(smmwt_u, tl, env, tl, tl)
+DEF_HELPER_4(kmmawb, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawb_u, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawt, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawt_u, tl, env, tl, tl, tl)
+DEF_HELPER_3(kmmwb2, tl, env, tl, tl)
+DEF_HELPER_3(kmmwb2_u, tl, env, tl, tl)
+DEF_HELPER_3(kmmwt2, tl, env, tl, tl)
+DEF_HELPER_3(kmmwt2_u, tl, env, tl, tl)
+DEF_HELPER_4(kmmawb2, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawb2_u, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawt2, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawt2_u, tl, env, tl, tl, tl)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 0484de140b..e5a8f663dc 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -883,3 +883,20 @@ kmmsb      0100001  ..... ..... 001 ..... 1110111 @r
 kmmsb_u    0101001  ..... ..... 001 ..... 1110111 @r
 kwmmul     0110001  ..... ..... 001 ..... 1110111 @r
 kwmmul_u   0111001  ..... ..... 001 ..... 1110111 @r
+
+smmwb      0100010  ..... ..... 001 ..... 1110111 @r
+smmwb_u    0101010  ..... ..... 001 ..... 1110111 @r
+smmwt      0110010  ..... ..... 001 ..... 1110111 @r
+smmwt_u    0111010  ..... ..... 001 ..... 1110111 @r
+kmmawb     0100011  ..... ..... 001 ..... 1110111 @r
+kmmawb_u   0101011  ..... ..... 001 ..... 1110111 @r
+kmmawt     0110011  ..... ..... 001 ..... 1110111 @r
+kmmawt_u   0111011  ..... ..... 001 ..... 1110111 @r
+kmmwb2     1000111  ..... ..... 001 ..... 1110111 @r
+kmmwb2_u   1001111  ..... ..... 001 ..... 1110111 @r
+kmmwt2     1010111  ..... ..... 001 ..... 1110111 @r
+kmmwt2_u   1011111  ..... ..... 001 ..... 1110111 @r
+kmmawb2    1100111  ..... ..... 001 ..... 1110111 @r
+kmmawb2_u  1101111  ..... ..... 001 ..... 1110111 @r
+kmmawt2    1110111  ..... ..... 001 ..... 1110111 @r
+kmmawt2_u  1111111  ..... ..... 001 ..... 1110111 @r
diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc
index 073558b950..af490a5ef0 100644
--- a/target/riscv/insn_trans/trans_rvp.c.inc
+++ b/target/riscv/insn_trans/trans_rvp.c.inc
@@ -413,3 +413,21 @@ GEN_RVP_R_ACC_OOL(kmmsb);
 GEN_RVP_R_ACC_OOL(kmmsb_u);
 GEN_RVP_R_OOL(kwmmul);
 GEN_RVP_R_OOL(kwmmul_u);
+
+/* Most Significant Word “32x16” Multiply & Add Instructions */
+GEN_RVP_R_OOL(smmwb);
+GEN_RVP_R_OOL(smmwb_u);
+GEN_RVP_R_OOL(smmwt);
+GEN_RVP_R_OOL(smmwt_u);
+GEN_RVP_R_ACC_OOL(kmmawb);
+GEN_RVP_R_ACC_OOL(kmmawb_u);
+GEN_RVP_R_ACC_OOL(kmmawt);
+GEN_RVP_R_ACC_OOL(kmmawt_u);
+GEN_RVP_R_OOL(kmmwb2);
+GEN_RVP_R_OOL(kmmwb2_u);
+GEN_RVP_R_OOL(kmmwt2);
+GEN_RVP_R_OOL(kmmwt2_u);
+GEN_RVP_R_ACC_OOL(kmmawb2);
+GEN_RVP_R_ACC_OOL(kmmawb2_u);
+GEN_RVP_R_ACC_OOL(kmmawt2);
+GEN_RVP_R_ACC_OOL(kmmawt2_u);
diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
index 465cb5a3b3..868a1a71ba 100644
--- a/target/riscv/packed_helper.c
+++ b/target/riscv/packed_helper.c
@@ -1468,3 +1468,211 @@ static inline void do_kwmmul_u(CPURISCVState *env, void *vd, void *va,
 }
 
 RVPR(kwmmul_u, 1, 4);
+
+/* Most Significant Word “32x16” Multiply & Add Instructions */
+static inline void do_smmwb(CPURISCVState *env, void *vd, void *va,
+                            void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16;
+}
+
+RVPR(smmwb, 1, 4);
+
+static inline void do_smmwb_u(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 15)) >> 16;
+}
+
+RVPR(smmwb_u, 1, 4);
+
+static inline void do_smmwt(CPURISCVState *env, void *vd, void *va,
+                            void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16;
+}
+
+RVPR(smmwt, 1, 4);
+
+static inline void do_smmwt_u(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 15)) >> 16;
+}
+
+RVPR(smmwt_u, 1, 4);
+
+static inline void do_kmmawb(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc;
+    int16_t *b = vb;
+    d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawb, 1, 4);
+
+static inline void do_kmmawb_u(CPURISCVState *env, void *vd, void *va,
+                               void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc;
+    int16_t *b = vb;
+    d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i)] +
+                               (1ull << 15)) >> 16, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawb_u, 1, 4);
+
+static inline void do_kmmawt(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc;
+    int16_t *b = vb;
+    d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16,
+                      c[H4(i)]);
+}
+
+RVPR_ACC(kmmawt, 1, 4);
+
+static inline void do_kmmawt_u(CPURISCVState *env, void *vd, void *va,
+                               void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc;
+    int16_t *b = vb;
+    d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] +
+                               (1ull << 15)) >> 16, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawt_u, 1, 4);
+
+static inline void do_kmmwb2(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        d[H4(i)] = INT32_MAX;
+    } else {
+        d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15;
+    }
+}
+
+RVPR(kmmwb2, 1, 4);
+
+static inline void do_kmmwb2_u(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        d[H4(i)] = INT32_MAX;
+    } else {
+        d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15;
+    }
+}
+
+RVPR(kmmwb2_u, 1, 4);
+
+static inline void do_kmmwt2(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        d[H4(i)] = INT32_MAX;
+    } else {
+        d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15;
+    }
+}
+
+RVPR(kmmwt2, 1, 4);
+
+static inline void do_kmmwt2_u(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        d[H4(i)] = INT32_MAX;
+    } else {
+        d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15;
+    }
+}
+
+RVPR(kmmwt2_u, 1, 4);
+
+static inline void do_kmmawb2(CPURISCVState *env, void *vd, void *va,
+                              void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc, result;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        result = INT32_MAX;
+    } else {
+        result = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15;
+    }
+    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawb2, 1, 4);
+
+static inline void do_kmmawb2_u(CPURISCVState *env, void *vd, void *va,
+                                void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc, result;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        result = INT32_MAX;
+    } else {
+        result = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15;
+    }
+    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawb2_u, 1, 4);
+
+static inline void do_kmmawt2(CPURISCVState *env, void *vd, void *va,
+                              void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc, result;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        result = INT32_MAX;
+    } else {
+        result = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15;
+    }
+    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawt2, 1, 4);
+
+static inline void do_kmmawt2_u(CPURISCVState *env, void *vd, void *va,
+                                void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc, result;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        result = INT32_MAX;
+    } else {
+        result = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15;
+    }
+    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawt2_u, 1, 4);
-- 
2.17.1



WARNING: multiple messages have this Message-ID (diff)
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: Alistair.Francis@wdc.com, palmer@dabbelt.com,
	bin.meng@windriver.com, LIU Zhiwei <zhiwei_liu@c-sky.com>
Subject: [PATCH v3 16/37] target/riscv: Signed MSW 32x16 Multiply and Add Instructions
Date: Thu, 24 Jun 2021 18:55:00 +0800	[thread overview]
Message-ID: <20210624105521.3964-17-zhiwei_liu@c-sky.com> (raw)
In-Reply-To: <20210624105521.3964-1-zhiwei_liu@c-sky.com>

Always contain a 32x16 multiplification and the most significant
word can be used as the result, or an operand for an add or
subtract operation with rounding or saturation.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
---
 target/riscv/helper.h                   |  17 ++
 target/riscv/insn32.decode              |  17 ++
 target/riscv/insn_trans/trans_rvp.c.inc |  18 ++
 target/riscv/packed_helper.c            | 208 ++++++++++++++++++++++++
 4 files changed, 260 insertions(+)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 600e8dee44..854f48d385 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1280,3 +1280,20 @@ DEF_HELPER_4(kmmsb, tl, env, tl, tl, tl)
 DEF_HELPER_4(kmmsb_u, tl, env, tl, tl, tl)
 DEF_HELPER_3(kwmmul, tl, env, tl, tl)
 DEF_HELPER_3(kwmmul_u, tl, env, tl, tl)
+
+DEF_HELPER_3(smmwb, tl, env, tl, tl)
+DEF_HELPER_3(smmwb_u, tl, env, tl, tl)
+DEF_HELPER_3(smmwt, tl, env, tl, tl)
+DEF_HELPER_3(smmwt_u, tl, env, tl, tl)
+DEF_HELPER_4(kmmawb, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawb_u, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawt, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawt_u, tl, env, tl, tl, tl)
+DEF_HELPER_3(kmmwb2, tl, env, tl, tl)
+DEF_HELPER_3(kmmwb2_u, tl, env, tl, tl)
+DEF_HELPER_3(kmmwt2, tl, env, tl, tl)
+DEF_HELPER_3(kmmwt2_u, tl, env, tl, tl)
+DEF_HELPER_4(kmmawb2, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawb2_u, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawt2, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmmawt2_u, tl, env, tl, tl, tl)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 0484de140b..e5a8f663dc 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -883,3 +883,20 @@ kmmsb      0100001  ..... ..... 001 ..... 1110111 @r
 kmmsb_u    0101001  ..... ..... 001 ..... 1110111 @r
 kwmmul     0110001  ..... ..... 001 ..... 1110111 @r
 kwmmul_u   0111001  ..... ..... 001 ..... 1110111 @r
+
+smmwb      0100010  ..... ..... 001 ..... 1110111 @r
+smmwb_u    0101010  ..... ..... 001 ..... 1110111 @r
+smmwt      0110010  ..... ..... 001 ..... 1110111 @r
+smmwt_u    0111010  ..... ..... 001 ..... 1110111 @r
+kmmawb     0100011  ..... ..... 001 ..... 1110111 @r
+kmmawb_u   0101011  ..... ..... 001 ..... 1110111 @r
+kmmawt     0110011  ..... ..... 001 ..... 1110111 @r
+kmmawt_u   0111011  ..... ..... 001 ..... 1110111 @r
+kmmwb2     1000111  ..... ..... 001 ..... 1110111 @r
+kmmwb2_u   1001111  ..... ..... 001 ..... 1110111 @r
+kmmwt2     1010111  ..... ..... 001 ..... 1110111 @r
+kmmwt2_u   1011111  ..... ..... 001 ..... 1110111 @r
+kmmawb2    1100111  ..... ..... 001 ..... 1110111 @r
+kmmawb2_u  1101111  ..... ..... 001 ..... 1110111 @r
+kmmawt2    1110111  ..... ..... 001 ..... 1110111 @r
+kmmawt2_u  1111111  ..... ..... 001 ..... 1110111 @r
diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc
index 073558b950..af490a5ef0 100644
--- a/target/riscv/insn_trans/trans_rvp.c.inc
+++ b/target/riscv/insn_trans/trans_rvp.c.inc
@@ -413,3 +413,21 @@ GEN_RVP_R_ACC_OOL(kmmsb);
 GEN_RVP_R_ACC_OOL(kmmsb_u);
 GEN_RVP_R_OOL(kwmmul);
 GEN_RVP_R_OOL(kwmmul_u);
+
+/* Most Significant Word “32x16” Multiply & Add Instructions */
+GEN_RVP_R_OOL(smmwb);
+GEN_RVP_R_OOL(smmwb_u);
+GEN_RVP_R_OOL(smmwt);
+GEN_RVP_R_OOL(smmwt_u);
+GEN_RVP_R_ACC_OOL(kmmawb);
+GEN_RVP_R_ACC_OOL(kmmawb_u);
+GEN_RVP_R_ACC_OOL(kmmawt);
+GEN_RVP_R_ACC_OOL(kmmawt_u);
+GEN_RVP_R_OOL(kmmwb2);
+GEN_RVP_R_OOL(kmmwb2_u);
+GEN_RVP_R_OOL(kmmwt2);
+GEN_RVP_R_OOL(kmmwt2_u);
+GEN_RVP_R_ACC_OOL(kmmawb2);
+GEN_RVP_R_ACC_OOL(kmmawb2_u);
+GEN_RVP_R_ACC_OOL(kmmawt2);
+GEN_RVP_R_ACC_OOL(kmmawt2_u);
diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
index 465cb5a3b3..868a1a71ba 100644
--- a/target/riscv/packed_helper.c
+++ b/target/riscv/packed_helper.c
@@ -1468,3 +1468,211 @@ static inline void do_kwmmul_u(CPURISCVState *env, void *vd, void *va,
 }
 
 RVPR(kwmmul_u, 1, 4);
+
+/* Most Significant Word “32x16” Multiply & Add Instructions */
+static inline void do_smmwb(CPURISCVState *env, void *vd, void *va,
+                            void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16;
+}
+
+RVPR(smmwb, 1, 4);
+
+static inline void do_smmwb_u(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 15)) >> 16;
+}
+
+RVPR(smmwb_u, 1, 4);
+
+static inline void do_smmwt(CPURISCVState *env, void *vd, void *va,
+                            void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16;
+}
+
+RVPR(smmwt, 1, 4);
+
+static inline void do_smmwt_u(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 15)) >> 16;
+}
+
+RVPR(smmwt_u, 1, 4);
+
+static inline void do_kmmawb(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc;
+    int16_t *b = vb;
+    d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i)] >> 16, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawb, 1, 4);
+
+static inline void do_kmmawb_u(CPURISCVState *env, void *vd, void *va,
+                               void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc;
+    int16_t *b = vb;
+    d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i)] +
+                               (1ull << 15)) >> 16, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawb_u, 1, 4);
+
+static inline void do_kmmawt(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc;
+    int16_t *b = vb;
+    d[H4(i)] = sadd32(env, 0, (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 16,
+                      c[H4(i)]);
+}
+
+RVPR_ACC(kmmawt, 1, 4);
+
+static inline void do_kmmawt_u(CPURISCVState *env, void *vd, void *va,
+                               void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc;
+    int16_t *b = vb;
+    d[H4(i)] = sadd32(env, 0, ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] +
+                               (1ull << 15)) >> 16, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawt_u, 1, 4);
+
+static inline void do_kmmwb2(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        d[H4(i)] = INT32_MAX;
+    } else {
+        d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15;
+    }
+}
+
+RVPR(kmmwb2, 1, 4);
+
+static inline void do_kmmwb2_u(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        d[H4(i)] = INT32_MAX;
+    } else {
+        d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15;
+    }
+}
+
+RVPR(kmmwb2_u, 1, 4);
+
+static inline void do_kmmwt2(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        d[H4(i)] = INT32_MAX;
+    } else {
+        d[H4(i)] = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15;
+    }
+}
+
+RVPR(kmmwt2, 1, 4);
+
+static inline void do_kmmwt2_u(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    int32_t *d = vd, *a = va;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        d[H4(i)] = INT32_MAX;
+    } else {
+        d[H4(i)] = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15;
+    }
+}
+
+RVPR(kmmwt2_u, 1, 4);
+
+static inline void do_kmmawb2(CPURISCVState *env, void *vd, void *va,
+                              void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc, result;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        result = INT32_MAX;
+    } else {
+        result = (int64_t)a[H4(i)] * b[H2(2 * i)] >> 15;
+    }
+    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawb2, 1, 4);
+
+static inline void do_kmmawb2_u(CPURISCVState *env, void *vd, void *va,
+                                void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc, result;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        result = INT32_MAX;
+    } else {
+        result = ((int64_t)a[H4(i)] * b[H2(2 * i)] + (1ull << 14)) >> 15;
+    }
+    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawb2_u, 1, 4);
+
+static inline void do_kmmawt2(CPURISCVState *env, void *vd, void *va,
+                              void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc, result;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        result = INT32_MAX;
+    } else {
+        result = (int64_t)a[H4(i)] * b[H2(2 * i + 1)] >> 15;
+    }
+    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawt2, 1, 4);
+
+static inline void do_kmmawt2_u(CPURISCVState *env, void *vd, void *va,
+                                void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *a = va, *c = vc, result;
+    int16_t *b = vb;
+    if (a[H4(i)] == INT32_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        env->vxsat = 0x1;
+        result = INT32_MAX;
+    } else {
+        result = ((int64_t)a[H4(i)] * b[H2(2 * i + 1)] + (1ull << 14)) >> 15;
+    }
+    d[H4(i)] = sadd32(env, 0, result, c[H4(i)]);
+}
+
+RVPR_ACC(kmmawt2_u, 1, 4);
-- 
2.17.1



  parent reply	other threads:[~2021-06-24 11:29 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-24 10:54 [PATCH v3 00/37] target/riscv: support packed extension v0.9.4 LIU Zhiwei
2021-06-24 10:54 ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 01/37] target/riscv: implementation-defined constant parameters LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 02/37] target/riscv: Make the vector helper functions public LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 03/37] target/riscv: 16-bit Addition & Subtraction Instructions LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-07-01  2:02   ` Alistair Francis
2021-07-01  2:02     ` Alistair Francis
2021-06-24 10:54 ` [PATCH v3 04/37] target/riscv: 8-bit Addition & Subtraction Instruction LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 05/37] target/riscv: SIMD 16-bit Shift Instructions LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-07-01  2:08   ` Alistair Francis
2021-07-01  2:08     ` Alistair Francis
2021-06-24 10:54 ` [PATCH v3 06/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 07/37] target/riscv: SIMD 16-bit Compare Instructions LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 08/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 09/37] target/riscv: SIMD 16-bit Multiply Instructions LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 10/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 11/37] target/riscv: SIMD 16-bit Miscellaneous Instructions LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 12/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 13/37] target/riscv: 8-bit Unpacking Instructions LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 14/37] target/riscv: 16-bit Packing Instructions LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:54 ` [PATCH v3 15/37] target/riscv: Signed MSW 32x32 Multiply and Add Instructions LIU Zhiwei
2021-06-24 10:54   ` LIU Zhiwei
2021-06-24 10:55 ` LIU Zhiwei [this message]
2021-06-24 10:55   ` [PATCH v3 16/37] target/riscv: Signed MSW 32x16 " LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 17/37] target/riscv: Signed 16-bit Multiply 32-bit Add/Subtract Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 18/37] target/riscv: Signed 16-bit Multiply 64-bit " LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 19/37] target/riscv: Partial-SIMD Miscellaneous Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 20/37] target/riscv: 8-bit Multiply with 32-bit Add Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 21/37] target/riscv: 64-bit Add/Subtract Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 22/37] target/riscv: 32-bit Multiply " LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 23/37] target/riscv: Signed 16-bit Multiply with " LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 24/37] target/riscv: Non-SIMD Q15 saturation ALU Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 25/37] target/riscv: Non-SIMD Q31 " LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 26/37] target/riscv: 32-bit Computation Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 27/37] target/riscv: Non-SIMD Miscellaneous Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 28/37] target/riscv: RV64 Only SIMD 32-bit Add/Subtract Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 29/37] target/riscv: RV64 Only SIMD 32-bit Shift Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 30/37] target/riscv: RV64 Only SIMD 32-bit Miscellaneous Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 31/37] target/riscv: RV64 Only SIMD Q15 saturating Multiply Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 32/37] target/riscv: RV64 Only 32-bit " LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 33/37] target/riscv: RV64 Only 32-bit Multiply & Add Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 34/37] target/riscv: RV64 Only 32-bit Parallel " LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 35/37] target/riscv: RV64 Only Non-SIMD 32-bit Shift Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 36/37] target/riscv: RV64 Only 32-bit Packing Instructions LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 10:55 ` [PATCH v3 37/37] target/riscv: configure and turn on packed extension from command line LIU Zhiwei
2021-06-24 10:55   ` LIU Zhiwei
2021-06-24 11:55 ` [PATCH v3 00/37] target/riscv: support packed extension v0.9.4 no-reply
2021-06-24 11:55   ` no-reply
2021-07-01  1:30 ` Alistair Francis
2021-07-01  1:30   ` Alistair Francis
2021-07-01  3:06   ` LIU Zhiwei
2021-07-01  3:06     ` LIU Zhiwei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210624105521.3964-17-zhiwei_liu@c-sky.com \
    --to=zhiwei_liu@c-sky.com \
    --cc=Alistair.Francis@wdc.com \
    --cc=bin.meng@windriver.com \
    --cc=palmer@dabbelt.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-riscv@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.