All of lore.kernel.org
 help / color / mirror / Atom feed
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: palmer@dabbelt.com, richard.henderson@linaro.org,
	bin.meng@windriver.com, Alistair.Francis@wdc.com,
	LIU Zhiwei <zhiwei_liu@c-sky.com>
Subject: [PATCH v2 17/37] target/riscv: Signed 16-bit Multiply 32-bit Add/Subtract Instructions
Date: Thu, 10 Jun 2021 15:58:48 +0800	[thread overview]
Message-ID: <20210610075908.3305506-18-zhiwei_liu@c-sky.com> (raw)
In-Reply-To: <20210610075908.3305506-1-zhiwei_liu@c-sky.com>

Always contain a signed 16x16 multiply and the 32-bit result can be
written to the destination register or as an operand for an add/subtract
operation.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 target/riscv/helper.h                   |  19 ++
 target/riscv/insn32.decode              |  19 ++
 target/riscv/insn_trans/trans_rvp.c.inc |  20 ++
 target/riscv/packed_helper.c            | 268 ++++++++++++++++++++++++
 4 files changed, 326 insertions(+)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 854f48d385..5aac6ba578 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1297,3 +1297,22 @@ DEF_HELPER_4(kmmawb2, tl, env, tl, tl, tl)
 DEF_HELPER_4(kmmawb2_u, tl, env, tl, tl, tl)
 DEF_HELPER_4(kmmawt2, tl, env, tl, tl, tl)
 DEF_HELPER_4(kmmawt2_u, tl, env, tl, tl, tl)
+
+DEF_HELPER_3(smbb16, tl, env, tl, tl)
+DEF_HELPER_3(smbt16, tl, env, tl, tl)
+DEF_HELPER_3(smtt16, tl, env, tl, tl)
+DEF_HELPER_3(kmda, tl, env, tl, tl)
+DEF_HELPER_3(kmxda, tl, env, tl, tl)
+DEF_HELPER_3(smds, tl, env, tl, tl)
+DEF_HELPER_3(smdrs, tl, env, tl, tl)
+DEF_HELPER_3(smxds, tl, env, tl, tl)
+DEF_HELPER_4(kmabb, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmabt, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmatt, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmada, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmaxda, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmads, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmadrs, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmaxds, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmsda, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmsxda, tl, env, tl, tl, tl)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index e5a8f663dc..f590880750 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -900,3 +900,22 @@ kmmawb2    1100111  ..... ..... 001 ..... 1110111 @r
 kmmawb2_u  1101111  ..... ..... 001 ..... 1110111 @r
 kmmawt2    1110111  ..... ..... 001 ..... 1110111 @r
 kmmawt2_u  1111111  ..... ..... 001 ..... 1110111 @r
+
+smbb16     0000100  ..... ..... 001 ..... 1110111 @r
+smbt16     0001100  ..... ..... 001 ..... 1110111 @r
+smtt16     0010100  ..... ..... 001 ..... 1110111 @r
+kmda       0011100  ..... ..... 001 ..... 1110111 @r
+kmxda      0011101  ..... ..... 001 ..... 1110111 @r
+smds       0101100  ..... ..... 001 ..... 1110111 @r
+smdrs      0110100  ..... ..... 001 ..... 1110111 @r
+smxds      0111100  ..... ..... 001 ..... 1110111 @r
+kmabb      0101101  ..... ..... 001 ..... 1110111 @r
+kmabt      0110101  ..... ..... 001 ..... 1110111 @r
+kmatt      0111101  ..... ..... 001 ..... 1110111 @r
+kmada      0100100  ..... ..... 001 ..... 1110111 @r
+kmaxda     0100101  ..... ..... 001 ..... 1110111 @r
+kmads      0101110  ..... ..... 001 ..... 1110111 @r
+kmadrs     0110110  ..... ..... 001 ..... 1110111 @r
+kmaxds     0111110  ..... ..... 001 ..... 1110111 @r
+kmsda      0100110  ..... ..... 001 ..... 1110111 @r
+kmsxda     0100111  ..... ..... 001 ..... 1110111 @r
diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc
index af490a5ef0..308fc223db 100644
--- a/target/riscv/insn_trans/trans_rvp.c.inc
+++ b/target/riscv/insn_trans/trans_rvp.c.inc
@@ -431,3 +431,23 @@ GEN_RVP_R_ACC_OOL(kmmawb2);
 GEN_RVP_R_ACC_OOL(kmmawb2_u);
 GEN_RVP_R_ACC_OOL(kmmawt2);
 GEN_RVP_R_ACC_OOL(kmmawt2_u);
+
+/* Signed 16-bit Multiply with 32-bit Add/Subtract Instructions */
+GEN_RVP_R_OOL(smbb16);
+GEN_RVP_R_OOL(smbt16);
+GEN_RVP_R_OOL(smtt16);
+GEN_RVP_R_OOL(kmda);
+GEN_RVP_R_OOL(kmxda);
+GEN_RVP_R_OOL(smds);
+GEN_RVP_R_OOL(smdrs);
+GEN_RVP_R_OOL(smxds);
+GEN_RVP_R_ACC_OOL(kmabb);
+GEN_RVP_R_ACC_OOL(kmabt);
+GEN_RVP_R_ACC_OOL(kmatt);
+GEN_RVP_R_ACC_OOL(kmada);
+GEN_RVP_R_ACC_OOL(kmaxda);
+GEN_RVP_R_ACC_OOL(kmads);
+GEN_RVP_R_ACC_OOL(kmadrs);
+GEN_RVP_R_ACC_OOL(kmaxds);
+GEN_RVP_R_ACC_OOL(kmsda);
+GEN_RVP_R_ACC_OOL(kmsxda);
diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
index 868a1a71ba..88509fd118 100644
--- a/target/riscv/packed_helper.c
+++ b/target/riscv/packed_helper.c
@@ -1676,3 +1676,271 @@ static inline void do_kmmawt2_u(CPURISCVState *env, void *vd, void *va,
 }
 
 RVPR_ACC(kmmawt2_u, 1, 4);
+
+/* Signed 16-bit Multiply with 32-bit Add/Subtract Instruction */
+static inline void do_smbb16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = (int32_t)a[H2(2 * i)] * b[H2(2 * i)];
+}
+
+RVPR(smbb16, 1, 4);
+
+static inline void do_smbt16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)];
+}
+
+RVPR(smbt16, 1, 4);
+
+static inline void do_smtt16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+}
+
+RVPR(smtt16, 1, 4);
+
+static inline void do_kmda(CPURISCVState *env, void *vd, void *va,
+                           void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    if (a[H2(2 * i)] == INT16_MIN && a[H2(2 * i + 1)] == INT16_MIN &&
+        b[H2(2 * i)] == INT16_MIN && a[H2(2 * i + 1)] == INT16_MIN) {
+        d[H4(i)] = INT32_MAX;
+        env->vxsat = 0x1;
+    } else {
+        d[H4(i)] = (int32_t)a[H2(2 * i)] * b[H2(2 * i)] +
+                   (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+    }
+}
+
+RVPR(kmda, 1, 4);
+
+static inline void do_kmxda(CPURISCVState *env, void *vd, void *va,
+                            void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    if (a[H2(2 * i)] == INT16_MIN && a[H2(2 * i + 1)] == INT16_MIN &&
+        b[H2(2 * i)] == INT16_MIN && a[H2(2 * i + 1)] == INT16_MIN) {
+        d[H4(i)] = INT32_MAX;
+        env->vxsat = 0x1;
+    } else {
+        d[H4(i)] = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i)] +
+                   (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)];
+    }
+}
+
+RVPR(kmxda, 1, 4);
+
+static inline void do_smds(CPURISCVState *env, void *vd, void *va,
+                           void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)] -
+               (int32_t)a[H2(2 * i)] * b[H2(2 * i)];
+}
+
+RVPR(smds, 1, 4);
+
+static inline void do_smdrs(CPURISCVState *env, void *vd, void *va,
+                            void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = (int32_t)a[H2(2 * i)] * b[H2(2 * i)] -
+               (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+}
+
+RVPR(smdrs, 1, 4);
+
+static inline void do_smxds(CPURISCVState *env, void *vd, void *va,
+                            void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i)] -
+               (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)];
+}
+
+RVPR(smxds, 1, 4);
+
+static inline void do_kmabb(CPURISCVState *env, void *vd, void *va,
+                            void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = sadd32(env, 0, (int32_t)a[H2(2 * i)] * b[H2(2 * i)], c[H4(i)]);
+}
+
+RVPR_ACC(kmabb, 1, 4);
+
+static inline void do_kmabt(CPURISCVState *env, void *vd, void *va,
+                            void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = sadd32(env, 0, (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)],
+                      c[H4(i)]);
+}
+
+RVPR_ACC(kmabt, 1, 4);
+
+static inline void do_kmatt(CPURISCVState *env, void *vd, void *va,
+                            void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = sadd32(env, 0, (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)],
+                      c[H4(i)]);
+}
+
+RVPR_ACC(kmatt, 1, 4);
+
+static inline void do_kmada(CPURISCVState *env, void *vd, void *va,
+                            void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 = (int32_t)a[H2(2 * i)] * b[H2(2 * i)];
+    p2 = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+
+    if (a[H2(i)] == INT16_MIN && a[H2(i + 1)] == INT16_MIN &&
+        b[H2(i)] == INT16_MIN && b[H2(i + 1)] == INT16_MIN) {
+        if (c[H4(i)] < 0) {
+            d[H4(i)] = INT32_MAX + c[H4(i)] + 1ll;
+        } else {
+            env->vxsat = 0x1;
+            d[H4(i)] = INT32_MAX;
+        }
+    } else {
+        d[H4(i)] = sadd32(env, 0, p1 + p2, c[H4(i)]);
+    }
+}
+
+RVPR_ACC(kmada, 1, 4);
+
+static inline void do_kmaxda(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i)];
+    p2 = (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)];
+
+    if (a[H2(2 * i)] == INT16_MIN && a[H2(2 * i + 1)] == INT16_MIN &&
+        b[H2(2 * i)] == INT16_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        if (c[H4(i)] < 0) {
+            d[H4(i)] = INT32_MAX + c[H4(i)] + 1ll;
+        } else {
+            env->vxsat = 0x1;
+            d[H4(i)] = INT32_MAX;
+        }
+    } else {
+        d[H4(i)] = sadd32(env, 0, p1 + p2, c[H4(i)]);
+    }
+}
+
+RVPR_ACC(kmaxda, 1, 4);
+
+static inline void do_kmads(CPURISCVState *env, void *vd, void *va,
+                            void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 =   (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+    p2 =   (int32_t)a[H2(2 * i)] * b[H2(2 * i)];
+
+    d[H4(i)] = sadd32(env, 0, p1 - p2, c[H4(i)]);
+}
+
+RVPR_ACC(kmads, 1, 4);
+
+static inline void do_kmadrs(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void * vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 = (int32_t)a[H2(2 * i)] * b[H2(2 * i)];
+    p2 = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+
+    d[H4(i)] = sadd32(env, 0, p1 - p2, c[H4(i)]);
+}
+
+RVPR_ACC(kmadrs, 1, 4);
+
+static inline void do_kmaxds(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i)];
+    p2 = (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)];
+
+    d[H4(i)] = sadd32(env, 0, p1 - p2, c[H4(i)]);
+}
+
+RVPR_ACC(kmaxds, 1, 4);
+
+static inline void do_kmsda(CPURISCVState *env, void *vd, void *va,
+                            void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 = (int32_t)a[H2(2 * i)] * b[H2(2 * i)];
+    p2 = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+
+    if (a[H2(i)] == INT16_MIN && a[H2(i + 1)] == INT16_MIN &&
+        b[H2(i)] == INT16_MIN && b[H2(i + 1)] == INT16_MIN) {
+        if (c[H4(i)] < 0) {
+            env->vxsat = 0x1;
+            d[H4(i)] = INT32_MIN;
+        } else {
+            d[H4(i)] = c[H4(i)] - 1ll - INT32_MAX;
+        }
+    } else {
+        d[H4(i)] = ssub32(env, 0, c[H4(i)], p1 + p2);
+    }
+}
+
+RVPR_ACC(kmsda, 1, 4);
+
+static inline void do_kmsxda(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void * vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 = (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)];
+    p2 = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i)];
+
+    if (a[H2(i)] == INT16_MIN && a[H2(i + 1)] == INT16_MIN &&
+        b[H2(i)] == INT16_MIN && b[H2(i + 1)] == INT16_MIN) {
+        if (d[H4(i)] < 0) {
+            env->vxsat = 0x1;
+            d[H4(i)] = INT32_MIN;
+        } else {
+            d[H4(i)] = c[H4(i)] - 1ll - INT32_MAX;
+        }
+    } else {
+        d[H4(i)] = ssub32(env, 0, c[H4(i)], p1 + p2);
+    }
+}
+
+RVPR_ACC(kmsxda, 1, 4);
-- 
2.25.1



WARNING: multiple messages have this Message-ID (diff)
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: Alistair.Francis@wdc.com, palmer@dabbelt.com,
	bin.meng@windriver.com, richard.henderson@linaro.org,
	LIU Zhiwei <zhiwei_liu@c-sky.com>
Subject: [PATCH v2 17/37] target/riscv: Signed 16-bit Multiply 32-bit Add/Subtract Instructions
Date: Thu, 10 Jun 2021 15:58:48 +0800	[thread overview]
Message-ID: <20210610075908.3305506-18-zhiwei_liu@c-sky.com> (raw)
In-Reply-To: <20210610075908.3305506-1-zhiwei_liu@c-sky.com>

Always contain a signed 16x16 multiply and the 32-bit result can be
written to the destination register or as an operand for an add/subtract
operation.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 target/riscv/helper.h                   |  19 ++
 target/riscv/insn32.decode              |  19 ++
 target/riscv/insn_trans/trans_rvp.c.inc |  20 ++
 target/riscv/packed_helper.c            | 268 ++++++++++++++++++++++++
 4 files changed, 326 insertions(+)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 854f48d385..5aac6ba578 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1297,3 +1297,22 @@ DEF_HELPER_4(kmmawb2, tl, env, tl, tl, tl)
 DEF_HELPER_4(kmmawb2_u, tl, env, tl, tl, tl)
 DEF_HELPER_4(kmmawt2, tl, env, tl, tl, tl)
 DEF_HELPER_4(kmmawt2_u, tl, env, tl, tl, tl)
+
+DEF_HELPER_3(smbb16, tl, env, tl, tl)
+DEF_HELPER_3(smbt16, tl, env, tl, tl)
+DEF_HELPER_3(smtt16, tl, env, tl, tl)
+DEF_HELPER_3(kmda, tl, env, tl, tl)
+DEF_HELPER_3(kmxda, tl, env, tl, tl)
+DEF_HELPER_3(smds, tl, env, tl, tl)
+DEF_HELPER_3(smdrs, tl, env, tl, tl)
+DEF_HELPER_3(smxds, tl, env, tl, tl)
+DEF_HELPER_4(kmabb, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmabt, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmatt, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmada, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmaxda, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmads, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmadrs, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmaxds, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmsda, tl, env, tl, tl, tl)
+DEF_HELPER_4(kmsxda, tl, env, tl, tl, tl)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index e5a8f663dc..f590880750 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -900,3 +900,22 @@ kmmawb2    1100111  ..... ..... 001 ..... 1110111 @r
 kmmawb2_u  1101111  ..... ..... 001 ..... 1110111 @r
 kmmawt2    1110111  ..... ..... 001 ..... 1110111 @r
 kmmawt2_u  1111111  ..... ..... 001 ..... 1110111 @r
+
+smbb16     0000100  ..... ..... 001 ..... 1110111 @r
+smbt16     0001100  ..... ..... 001 ..... 1110111 @r
+smtt16     0010100  ..... ..... 001 ..... 1110111 @r
+kmda       0011100  ..... ..... 001 ..... 1110111 @r
+kmxda      0011101  ..... ..... 001 ..... 1110111 @r
+smds       0101100  ..... ..... 001 ..... 1110111 @r
+smdrs      0110100  ..... ..... 001 ..... 1110111 @r
+smxds      0111100  ..... ..... 001 ..... 1110111 @r
+kmabb      0101101  ..... ..... 001 ..... 1110111 @r
+kmabt      0110101  ..... ..... 001 ..... 1110111 @r
+kmatt      0111101  ..... ..... 001 ..... 1110111 @r
+kmada      0100100  ..... ..... 001 ..... 1110111 @r
+kmaxda     0100101  ..... ..... 001 ..... 1110111 @r
+kmads      0101110  ..... ..... 001 ..... 1110111 @r
+kmadrs     0110110  ..... ..... 001 ..... 1110111 @r
+kmaxds     0111110  ..... ..... 001 ..... 1110111 @r
+kmsda      0100110  ..... ..... 001 ..... 1110111 @r
+kmsxda     0100111  ..... ..... 001 ..... 1110111 @r
diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc
index af490a5ef0..308fc223db 100644
--- a/target/riscv/insn_trans/trans_rvp.c.inc
+++ b/target/riscv/insn_trans/trans_rvp.c.inc
@@ -431,3 +431,23 @@ GEN_RVP_R_ACC_OOL(kmmawb2);
 GEN_RVP_R_ACC_OOL(kmmawb2_u);
 GEN_RVP_R_ACC_OOL(kmmawt2);
 GEN_RVP_R_ACC_OOL(kmmawt2_u);
+
+/* Signed 16-bit Multiply with 32-bit Add/Subtract Instructions */
+GEN_RVP_R_OOL(smbb16);
+GEN_RVP_R_OOL(smbt16);
+GEN_RVP_R_OOL(smtt16);
+GEN_RVP_R_OOL(kmda);
+GEN_RVP_R_OOL(kmxda);
+GEN_RVP_R_OOL(smds);
+GEN_RVP_R_OOL(smdrs);
+GEN_RVP_R_OOL(smxds);
+GEN_RVP_R_ACC_OOL(kmabb);
+GEN_RVP_R_ACC_OOL(kmabt);
+GEN_RVP_R_ACC_OOL(kmatt);
+GEN_RVP_R_ACC_OOL(kmada);
+GEN_RVP_R_ACC_OOL(kmaxda);
+GEN_RVP_R_ACC_OOL(kmads);
+GEN_RVP_R_ACC_OOL(kmadrs);
+GEN_RVP_R_ACC_OOL(kmaxds);
+GEN_RVP_R_ACC_OOL(kmsda);
+GEN_RVP_R_ACC_OOL(kmsxda);
diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
index 868a1a71ba..88509fd118 100644
--- a/target/riscv/packed_helper.c
+++ b/target/riscv/packed_helper.c
@@ -1676,3 +1676,271 @@ static inline void do_kmmawt2_u(CPURISCVState *env, void *vd, void *va,
 }
 
 RVPR_ACC(kmmawt2_u, 1, 4);
+
+/* Signed 16-bit Multiply with 32-bit Add/Subtract Instruction */
+static inline void do_smbb16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = (int32_t)a[H2(2 * i)] * b[H2(2 * i)];
+}
+
+RVPR(smbb16, 1, 4);
+
+static inline void do_smbt16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)];
+}
+
+RVPR(smbt16, 1, 4);
+
+static inline void do_smtt16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+}
+
+RVPR(smtt16, 1, 4);
+
+static inline void do_kmda(CPURISCVState *env, void *vd, void *va,
+                           void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    if (a[H2(2 * i)] == INT16_MIN && a[H2(2 * i + 1)] == INT16_MIN &&
+        b[H2(2 * i)] == INT16_MIN && a[H2(2 * i + 1)] == INT16_MIN) {
+        d[H4(i)] = INT32_MAX;
+        env->vxsat = 0x1;
+    } else {
+        d[H4(i)] = (int32_t)a[H2(2 * i)] * b[H2(2 * i)] +
+                   (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+    }
+}
+
+RVPR(kmda, 1, 4);
+
+static inline void do_kmxda(CPURISCVState *env, void *vd, void *va,
+                            void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    if (a[H2(2 * i)] == INT16_MIN && a[H2(2 * i + 1)] == INT16_MIN &&
+        b[H2(2 * i)] == INT16_MIN && a[H2(2 * i + 1)] == INT16_MIN) {
+        d[H4(i)] = INT32_MAX;
+        env->vxsat = 0x1;
+    } else {
+        d[H4(i)] = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i)] +
+                   (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)];
+    }
+}
+
+RVPR(kmxda, 1, 4);
+
+static inline void do_smds(CPURISCVState *env, void *vd, void *va,
+                           void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)] -
+               (int32_t)a[H2(2 * i)] * b[H2(2 * i)];
+}
+
+RVPR(smds, 1, 4);
+
+static inline void do_smdrs(CPURISCVState *env, void *vd, void *va,
+                            void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = (int32_t)a[H2(2 * i)] * b[H2(2 * i)] -
+               (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+}
+
+RVPR(smdrs, 1, 4);
+
+static inline void do_smxds(CPURISCVState *env, void *vd, void *va,
+                            void *vb, uint8_t i)
+{
+    int32_t *d = vd;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i)] -
+               (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)];
+}
+
+RVPR(smxds, 1, 4);
+
+static inline void do_kmabb(CPURISCVState *env, void *vd, void *va,
+                            void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = sadd32(env, 0, (int32_t)a[H2(2 * i)] * b[H2(2 * i)], c[H4(i)]);
+}
+
+RVPR_ACC(kmabb, 1, 4);
+
+static inline void do_kmabt(CPURISCVState *env, void *vd, void *va,
+                            void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = sadd32(env, 0, (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)],
+                      c[H4(i)]);
+}
+
+RVPR_ACC(kmabt, 1, 4);
+
+static inline void do_kmatt(CPURISCVState *env, void *vd, void *va,
+                            void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    d[H4(i)] = sadd32(env, 0, (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)],
+                      c[H4(i)]);
+}
+
+RVPR_ACC(kmatt, 1, 4);
+
+static inline void do_kmada(CPURISCVState *env, void *vd, void *va,
+                            void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 = (int32_t)a[H2(2 * i)] * b[H2(2 * i)];
+    p2 = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+
+    if (a[H2(i)] == INT16_MIN && a[H2(i + 1)] == INT16_MIN &&
+        b[H2(i)] == INT16_MIN && b[H2(i + 1)] == INT16_MIN) {
+        if (c[H4(i)] < 0) {
+            d[H4(i)] = INT32_MAX + c[H4(i)] + 1ll;
+        } else {
+            env->vxsat = 0x1;
+            d[H4(i)] = INT32_MAX;
+        }
+    } else {
+        d[H4(i)] = sadd32(env, 0, p1 + p2, c[H4(i)]);
+    }
+}
+
+RVPR_ACC(kmada, 1, 4);
+
+static inline void do_kmaxda(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i)];
+    p2 = (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)];
+
+    if (a[H2(2 * i)] == INT16_MIN && a[H2(2 * i + 1)] == INT16_MIN &&
+        b[H2(2 * i)] == INT16_MIN && b[H2(2 * i + 1)] == INT16_MIN) {
+        if (c[H4(i)] < 0) {
+            d[H4(i)] = INT32_MAX + c[H4(i)] + 1ll;
+        } else {
+            env->vxsat = 0x1;
+            d[H4(i)] = INT32_MAX;
+        }
+    } else {
+        d[H4(i)] = sadd32(env, 0, p1 + p2, c[H4(i)]);
+    }
+}
+
+RVPR_ACC(kmaxda, 1, 4);
+
+static inline void do_kmads(CPURISCVState *env, void *vd, void *va,
+                            void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 =   (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+    p2 =   (int32_t)a[H2(2 * i)] * b[H2(2 * i)];
+
+    d[H4(i)] = sadd32(env, 0, p1 - p2, c[H4(i)]);
+}
+
+RVPR_ACC(kmads, 1, 4);
+
+static inline void do_kmadrs(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void * vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 = (int32_t)a[H2(2 * i)] * b[H2(2 * i)];
+    p2 = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+
+    d[H4(i)] = sadd32(env, 0, p1 - p2, c[H4(i)]);
+}
+
+RVPR_ACC(kmadrs, 1, 4);
+
+static inline void do_kmaxds(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i)];
+    p2 = (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)];
+
+    d[H4(i)] = sadd32(env, 0, p1 - p2, c[H4(i)]);
+}
+
+RVPR_ACC(kmaxds, 1, 4);
+
+static inline void do_kmsda(CPURISCVState *env, void *vd, void *va,
+                            void *vb, void *vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 = (int32_t)a[H2(2 * i)] * b[H2(2 * i)];
+    p2 = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i + 1)];
+
+    if (a[H2(i)] == INT16_MIN && a[H2(i + 1)] == INT16_MIN &&
+        b[H2(i)] == INT16_MIN && b[H2(i + 1)] == INT16_MIN) {
+        if (c[H4(i)] < 0) {
+            env->vxsat = 0x1;
+            d[H4(i)] = INT32_MIN;
+        } else {
+            d[H4(i)] = c[H4(i)] - 1ll - INT32_MAX;
+        }
+    } else {
+        d[H4(i)] = ssub32(env, 0, c[H4(i)], p1 + p2);
+    }
+}
+
+RVPR_ACC(kmsda, 1, 4);
+
+static inline void do_kmsxda(CPURISCVState *env, void *vd, void *va,
+                             void *vb, void * vc, uint8_t i)
+{
+    int32_t *d = vd, *c = vc;
+    int16_t *a = va, *b = vb;
+    int32_t p1, p2;
+    p1 = (int32_t)a[H2(2 * i)] * b[H2(2 * i + 1)];
+    p2 = (int32_t)a[H2(2 * i + 1)] * b[H2(2 * i)];
+
+    if (a[H2(i)] == INT16_MIN && a[H2(i + 1)] == INT16_MIN &&
+        b[H2(i)] == INT16_MIN && b[H2(i + 1)] == INT16_MIN) {
+        if (d[H4(i)] < 0) {
+            env->vxsat = 0x1;
+            d[H4(i)] = INT32_MIN;
+        } else {
+            d[H4(i)] = c[H4(i)] - 1ll - INT32_MAX;
+        }
+    } else {
+        d[H4(i)] = ssub32(env, 0, c[H4(i)], p1 + p2);
+    }
+}
+
+RVPR_ACC(kmsxda, 1, 4);
-- 
2.25.1



  parent reply	other threads:[~2021-06-10  8:23 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-10  7:58 [PATCH v2 00/37] target/riscv: support packed extension v0.9.4 LIU Zhiwei
2021-06-10  7:58 ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 01/37] target/riscv: implementation-defined constant parameters LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 02/37] target/riscv: Make the vector helper functions public LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 03/37] target/riscv: 16-bit Addition & Subtraction Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10 18:00   ` Richard Henderson
2021-06-10 18:00     ` Richard Henderson
2021-06-10  7:58 ` [PATCH v2 04/37] target/riscv: 8-bit Addition & Subtraction Instruction LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10 19:39   ` Richard Henderson
2021-06-10 19:39     ` Richard Henderson
2021-06-11  4:36     ` LIU Zhiwei
2021-06-11  4:36       ` LIU Zhiwei
2021-06-24  6:05     ` LIU Zhiwei
2021-06-24  6:05       ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 05/37] target/riscv: SIMD 16-bit Shift Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10 19:44   ` Richard Henderson
2021-06-10 19:44     ` Richard Henderson
2021-06-10  7:58 ` [PATCH v2 06/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 07/37] target/riscv: SIMD 16-bit Compare Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 08/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 09/37] target/riscv: SIMD 16-bit Multiply Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 10/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 11/37] target/riscv: SIMD 16-bit Miscellaneous Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 12/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 13/37] target/riscv: 8-bit Unpacking Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 14/37] target/riscv: 16-bit Packing Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 15/37] target/riscv: Signed MSW 32x32 Multiply and Add Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 16/37] target/riscv: Signed MSW 32x16 " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` LIU Zhiwei [this message]
2021-06-10  7:58   ` [PATCH v2 17/37] target/riscv: Signed 16-bit Multiply 32-bit Add/Subtract Instructions LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 18/37] target/riscv: Signed 16-bit Multiply 64-bit " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 19/37] target/riscv: Partial-SIMD Miscellaneous Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 20/37] target/riscv: 8-bit Multiply with 32-bit Add Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 21/37] target/riscv: 64-bit Add/Subtract Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 22/37] target/riscv: 32-bit Multiply " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 23/37] target/riscv: Signed 16-bit Multiply with " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 24/37] target/riscv: Non-SIMD Q15 saturation ALU Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 25/37] target/riscv: Non-SIMD Q31 " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 26/37] target/riscv: 32-bit Computation Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 27/37] target/riscv: Non-SIMD Miscellaneous Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 28/37] target/riscv: RV64 Only SIMD 32-bit Add/Subtract Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 29/37] target/riscv: RV64 Only SIMD 32-bit Shift Instructions LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 30/37] target/riscv: RV64 Only SIMD 32-bit Miscellaneous Instructions LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 31/37] target/riscv: RV64 Only SIMD Q15 saturating Multiply Instructions LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 32/37] target/riscv: RV64 Only 32-bit " LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 33/37] target/riscv: RV64 Only 32-bit Multiply & Add Instructions LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 34/37] target/riscv: RV64 Only 32-bit Parallel " LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 35/37] target/riscv: RV64 Only Non-SIMD 32-bit Shift Instructions LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 36/37] target/riscv: RV64 Only 32-bit Packing Instructions LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 37/37] target/riscv: configure and turn on packed extension from command line LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-14 22:55 ` [PATCH v2 00/37] target/riscv: support packed extension v0.9.4 no-reply
2021-06-14 22:55   ` no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210610075908.3305506-18-zhiwei_liu@c-sky.com \
    --to=zhiwei_liu@c-sky.com \
    --cc=Alistair.Francis@wdc.com \
    --cc=bin.meng@windriver.com \
    --cc=palmer@dabbelt.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-riscv@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.