All of lore.kernel.org
 help / color / mirror / Atom feed
From: frank.chang@sifive.com
To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: Frank Chang <frank.chang@sifive.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Alistair Francis <alistair.francis@wdc.com>,
	Bin Meng <bin.meng@windriver.com>,
	Richard Henderson <richard.henderson@linaro.org>
Subject: [PATCH v8 41/78] target/riscv: rvv-1.0: single-width averaging add and subtract instructions
Date: Fri, 15 Oct 2021 15:45:49 +0800	[thread overview]
Message-ID: <20211015074627.3957162-49-frank.chang@sifive.com> (raw)
In-Reply-To: <20211015074627.3957162-1-frank.chang@sifive.com>

From: Frank Chang <frank.chang@sifive.com>

Add the following instructions:

* vaaddu.vv
* vaaddu.vx
* vasubu.vv
* vasubu.vx

Remove the following instructions:

* vadd.vi

Signed-off-by: Frank Chang <frank.chang@sifive.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/riscv/helper.h                   | 16 ++++++
 target/riscv/insn32.decode              | 13 +++--
 target/riscv/insn_trans/trans_rvv.c.inc |  5 +-
 target/riscv/vector_helper.c            | 74 +++++++++++++++++++++++++
 4 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 878d82caf61..f2e8d107d2f 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -648,18 +648,34 @@ DEF_HELPER_6(vaadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vaadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vaadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vaadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vasub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vasub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vasub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vasub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasubu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vaadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vaadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vaadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vaadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vasub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vasub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vasub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vasub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasubu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasubu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasubu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasubu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
 
 DEF_HELPER_6(vsmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vsmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index a6f9e5dcc66..f83c8daf24e 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -493,11 +493,14 @@ vssubu_vv       100010 . ..... ..... 000 ..... 1010111 @r_vm
 vssubu_vx       100010 . ..... ..... 100 ..... 1010111 @r_vm
 vssub_vv        100011 . ..... ..... 000 ..... 1010111 @r_vm
 vssub_vx        100011 . ..... ..... 100 ..... 1010111 @r_vm
-vaadd_vv        100100 . ..... ..... 000 ..... 1010111 @r_vm
-vaadd_vx        100100 . ..... ..... 100 ..... 1010111 @r_vm
-vaadd_vi        100100 . ..... ..... 011 ..... 1010111 @r_vm
-vasub_vv        100110 . ..... ..... 000 ..... 1010111 @r_vm
-vasub_vx        100110 . ..... ..... 100 ..... 1010111 @r_vm
+vaadd_vv        001001 . ..... ..... 010 ..... 1010111 @r_vm
+vaadd_vx        001001 . ..... ..... 110 ..... 1010111 @r_vm
+vaaddu_vv       001000 . ..... ..... 010 ..... 1010111 @r_vm
+vaaddu_vx       001000 . ..... ..... 110 ..... 1010111 @r_vm
+vasub_vv        001011 . ..... ..... 010 ..... 1010111 @r_vm
+vasub_vx        001011 . ..... ..... 110 ..... 1010111 @r_vm
+vasubu_vv       001010 . ..... ..... 010 ..... 1010111 @r_vm
+vasubu_vx       001010 . ..... ..... 110 ..... 1010111 @r_vm
 vsmul_vv        100111 . ..... ..... 000 ..... 1010111 @r_vm
 vsmul_vx        100111 . ..... ..... 100 ..... 1010111 @r_vm
 vwsmaccu_vv     111100 . ..... ..... 000 ..... 1010111 @r_vm
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 43cdc846d9f..0fc19b19c69 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2003,10 +2003,13 @@ GEN_OPIVI_TRANS(vsadd_vi, IMM_SX, vsadd_vx, opivx_check)
 
 /* Vector Single-Width Averaging Add and Subtract */
 GEN_OPIVV_TRANS(vaadd_vv, opivv_check)
+GEN_OPIVV_TRANS(vaaddu_vv, opivv_check)
 GEN_OPIVV_TRANS(vasub_vv, opivv_check)
+GEN_OPIVV_TRANS(vasubu_vv, opivv_check)
 GEN_OPIVX_TRANS(vaadd_vx,  opivx_check)
+GEN_OPIVX_TRANS(vaaddu_vx,  opivx_check)
 GEN_OPIVX_TRANS(vasub_vx,  opivx_check)
-GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check)
+GEN_OPIVX_TRANS(vasubu_vx,  opivx_check)
 
 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
 GEN_OPIVV_TRANS(vsmul_vv, opivv_check)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 58ba2a7d99b..6891f28116f 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -2295,6 +2295,43 @@ GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
 
+static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
+                               uint32_t a, uint32_t b)
+{
+    uint64_t res = (uint64_t)a + b;
+    uint8_t round = get_round(vxrm, res, 1);
+
+    return (res >> 1) + round;
+}
+
+static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
+                               uint64_t a, uint64_t b)
+{
+    uint64_t res = a + b;
+    uint8_t round = get_round(vxrm, res, 1);
+    uint64_t over = (uint64_t)(res < a) << 63;
+
+    return ((res >> 1) | over) + round;
+}
+
+RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
+RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
+RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
+RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
+GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
+
+RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
+RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
+RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
+RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
+GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
+
 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
 {
     int64_t res = (int64_t)a - b;
@@ -2331,6 +2368,43 @@ GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
 
+static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
+                               uint32_t a, uint32_t b)
+{
+    int64_t res = (int64_t)a - b;
+    uint8_t round = get_round(vxrm, res, 1);
+
+    return (res >> 1) + round;
+}
+
+static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
+                               uint64_t a, uint64_t b)
+{
+    uint64_t res = (uint64_t)a - b;
+    uint8_t round = get_round(vxrm, res, 1);
+    uint64_t over = (uint64_t)(res > a) << 63;
+
+    return ((res >> 1) | over) + round;
+}
+
+RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
+RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
+RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
+RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
+GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
+
+RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
+RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
+RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
+RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
+GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
+
 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
 {
-- 
2.25.1



WARNING: multiple messages have this Message-ID (diff)
From: frank.chang@sifive.com
To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: Frank Chang <frank.chang@sifive.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Alistair Francis <alistair.francis@wdc.com>,
	Bin Meng <bin.meng@windriver.com>
Subject: [PATCH v8 41/78] target/riscv: rvv-1.0: single-width averaging add and subtract instructions
Date: Fri, 15 Oct 2021 15:45:49 +0800	[thread overview]
Message-ID: <20211015074627.3957162-49-frank.chang@sifive.com> (raw)
In-Reply-To: <20211015074627.3957162-1-frank.chang@sifive.com>

From: Frank Chang <frank.chang@sifive.com>

Add the following instructions:

* vaaddu.vv
* vaaddu.vx
* vasubu.vv
* vasubu.vx

Remove the following instructions:

* vadd.vi

Signed-off-by: Frank Chang <frank.chang@sifive.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/riscv/helper.h                   | 16 ++++++
 target/riscv/insn32.decode              | 13 +++--
 target/riscv/insn_trans/trans_rvv.c.inc |  5 +-
 target/riscv/vector_helper.c            | 74 +++++++++++++++++++++++++
 4 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 878d82caf61..f2e8d107d2f 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -648,18 +648,34 @@ DEF_HELPER_6(vaadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vaadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vaadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vaadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vasub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vasub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vasub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vasub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasubu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vaadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vaadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vaadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vaadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vasub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vasub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vasub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vasub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasubu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasubu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasubu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasubu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
 
 DEF_HELPER_6(vsmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vsmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index a6f9e5dcc66..f83c8daf24e 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -493,11 +493,14 @@ vssubu_vv       100010 . ..... ..... 000 ..... 1010111 @r_vm
 vssubu_vx       100010 . ..... ..... 100 ..... 1010111 @r_vm
 vssub_vv        100011 . ..... ..... 000 ..... 1010111 @r_vm
 vssub_vx        100011 . ..... ..... 100 ..... 1010111 @r_vm
-vaadd_vv        100100 . ..... ..... 000 ..... 1010111 @r_vm
-vaadd_vx        100100 . ..... ..... 100 ..... 1010111 @r_vm
-vaadd_vi        100100 . ..... ..... 011 ..... 1010111 @r_vm
-vasub_vv        100110 . ..... ..... 000 ..... 1010111 @r_vm
-vasub_vx        100110 . ..... ..... 100 ..... 1010111 @r_vm
+vaadd_vv        001001 . ..... ..... 010 ..... 1010111 @r_vm
+vaadd_vx        001001 . ..... ..... 110 ..... 1010111 @r_vm
+vaaddu_vv       001000 . ..... ..... 010 ..... 1010111 @r_vm
+vaaddu_vx       001000 . ..... ..... 110 ..... 1010111 @r_vm
+vasub_vv        001011 . ..... ..... 010 ..... 1010111 @r_vm
+vasub_vx        001011 . ..... ..... 110 ..... 1010111 @r_vm
+vasubu_vv       001010 . ..... ..... 010 ..... 1010111 @r_vm
+vasubu_vx       001010 . ..... ..... 110 ..... 1010111 @r_vm
 vsmul_vv        100111 . ..... ..... 000 ..... 1010111 @r_vm
 vsmul_vx        100111 . ..... ..... 100 ..... 1010111 @r_vm
 vwsmaccu_vv     111100 . ..... ..... 000 ..... 1010111 @r_vm
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 43cdc846d9f..0fc19b19c69 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2003,10 +2003,13 @@ GEN_OPIVI_TRANS(vsadd_vi, IMM_SX, vsadd_vx, opivx_check)
 
 /* Vector Single-Width Averaging Add and Subtract */
 GEN_OPIVV_TRANS(vaadd_vv, opivv_check)
+GEN_OPIVV_TRANS(vaaddu_vv, opivv_check)
 GEN_OPIVV_TRANS(vasub_vv, opivv_check)
+GEN_OPIVV_TRANS(vasubu_vv, opivv_check)
 GEN_OPIVX_TRANS(vaadd_vx,  opivx_check)
+GEN_OPIVX_TRANS(vaaddu_vx,  opivx_check)
 GEN_OPIVX_TRANS(vasub_vx,  opivx_check)
-GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check)
+GEN_OPIVX_TRANS(vasubu_vx,  opivx_check)
 
 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
 GEN_OPIVV_TRANS(vsmul_vv, opivv_check)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 58ba2a7d99b..6891f28116f 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -2295,6 +2295,43 @@ GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
 
+static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
+                               uint32_t a, uint32_t b)
+{
+    uint64_t res = (uint64_t)a + b;
+    uint8_t round = get_round(vxrm, res, 1);
+
+    return (res >> 1) + round;
+}
+
+static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
+                               uint64_t a, uint64_t b)
+{
+    uint64_t res = a + b;
+    uint8_t round = get_round(vxrm, res, 1);
+    uint64_t over = (uint64_t)(res < a) << 63;
+
+    return ((res >> 1) | over) + round;
+}
+
+RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
+RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
+RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
+RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
+GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
+
+RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
+RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
+RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
+RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
+GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
+
 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
 {
     int64_t res = (int64_t)a - b;
@@ -2331,6 +2368,43 @@ GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
 
+static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
+                               uint32_t a, uint32_t b)
+{
+    int64_t res = (int64_t)a - b;
+    uint8_t round = get_round(vxrm, res, 1);
+
+    return (res >> 1) + round;
+}
+
+static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
+                               uint64_t a, uint64_t b)
+{
+    uint64_t res = (uint64_t)a - b;
+    uint8_t round = get_round(vxrm, res, 1);
+    uint64_t over = (uint64_t)(res > a) << 63;
+
+    return ((res >> 1) | over) + round;
+}
+
+RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
+RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
+RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
+RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
+GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
+
+RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
+RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
+RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
+RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
+GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
+
 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
 {
-- 
2.25.1



  parent reply	other threads:[~2021-10-15  8:31 UTC|newest]

Thread overview: 237+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-15  7:45 [PATCH v8 00/78] support vector extension v1.0 frank.chang
2021-10-15  7:45 ` [PATCH v8 01/78] target/riscv: fix TB_FLAGS bits overlapping bug for rvv/rvh frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-16  3:04   ` Richard Henderson
2021-10-16  3:04     ` Richard Henderson
2021-10-17 22:55   ` Alistair Francis
2021-10-17 22:55     ` Alistair Francis
2021-10-18  5:38     ` Richard Henderson
2021-10-18  5:38       ` Richard Henderson
2021-10-18  6:01   ` Alistair Francis
2021-10-18  6:01     ` Alistair Francis
2021-10-15  7:45 ` [PATCH v8 02/78] target/riscv: drop vector 0.7.1 and add 1.0 support frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 03/78] target/riscv: Use FIELD_EX32() to extract wd field frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 04/78] target/riscv: rvv-1.0: add mstatus VS field frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 05/78] target/riscv: rvv-1.0: add sstatus " frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 06/78] target/riscv: rvv-1.0: introduce writable misa.v field frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 07/78] target/riscv: rvv-1.0: add translation-time vector context status frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 08/78] target/riscv: rvv-1.0: remove rvv related codes from fcsr registers frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 09/78] target/riscv: rvv-1.0: add vcsr register frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 10/78] target/riscv: rvv-1.0: add vlenb register frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 11/78] target/riscv: rvv-1.0: check MSTATUS_VS when accessing vector csr registers frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 12/78] target/riscv: rvv-1.0: remove MLEN calculations frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 13/78] target/riscv: rvv-1.0: add fractional LMUL frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 14/78] target/riscv: rvv-1.0: add VMA and VTA frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 15/78] target/riscv: rvv-1.0: update check functions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 16/78] target/riscv: introduce more imm value modes in translator functions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 17/78] target/riscv: rvv:1.0: add translation-time nan-box helper function frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH 18/76] target/riscv: rvv-1.0: configure instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 18/78] target/riscv: rvv-1.0: remove amo operations instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-18  5:44   ` Alistair Francis
2021-10-18  5:44     ` Alistair Francis
2021-10-15  7:45 ` [PATCH v8 19/78] target/riscv: rvv-1.0: configure instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH 19/76] target/riscv: rvv-1.0: stride load and store instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH 20/76] target/riscv: rvv-1.0: index " frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 20/78] target/riscv: rvv-1.0: stride " frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH 21/76] target/riscv: rvv-1.0: fix address index overflow bug of indexed load/store insns frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 21/78] target/riscv: rvv-1.0: index load and store instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH 22/76] target/riscv: rvv-1.0: fault-only-first unit stride load frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 22/78] target/riscv: rvv-1.0: fix address index overflow bug of indexed load/store insns frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH 23/76] target/riscv: rvv-1.0: amo operations frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 23/78] target/riscv: rvv-1.0: fault-only-first unit stride load frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 24/78] target/riscv: rvv-1.0: load/store whole register instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 25/78] target/riscv: rvv-1.0: update vext_max_elems() for load/store insns frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 26/78] target/riscv: rvv-1.0: take fractional LMUL into vector max elements calculation frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 27/78] target/riscv: rvv-1.0: floating-point square-root instruction frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 28/78] target/riscv: rvv-1.0: floating-point classify instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 29/78] target/riscv: rvv-1.0: count population in mask instruction frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-18  5:46   ` Alistair Francis
2021-10-18  5:46     ` Alistair Francis
2021-10-15  7:45 ` [PATCH 29/76] target/riscv: rvv-1.0: mask population count instruction frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 30/78] target/riscv: rvv-1.0: find-first-set mask bit instruction frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-18  5:47   ` Alistair Francis
2021-10-18  5:47     ` Alistair Francis
2021-10-15  7:45 ` [PATCH v8 31/78] target/riscv: rvv-1.0: set-X-first mask bit instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-18  5:53   ` Alistair Francis
2021-10-18  5:53     ` Alistair Francis
2021-10-15  7:45 ` [PATCH v8 32/78] target/riscv: rvv-1.0: iota instruction frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-18  5:49   ` Alistair Francis
2021-10-18  5:49     ` Alistair Francis
2021-10-15  7:45 ` [PATCH v8 33/78] target/riscv: rvv-1.0: element index instruction frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-18  5:54   ` Alistair Francis
2021-10-18  5:54     ` Alistair Francis
2021-10-15  7:45 ` [PATCH v8 34/78] target/riscv: rvv-1.0: allow load element with sign-extended frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-21  4:28   ` Alistair Francis
2021-10-21  4:28     ` Alistair Francis
2021-10-15  7:45 ` [PATCH v8 35/78] target/riscv: rvv-1.0: register gather instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 36/78] target/riscv: rvv-1.0: integer scalar move instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-21  4:24   ` Alistair Francis
2021-10-21  4:24     ` Alistair Francis
2021-10-15  7:45 ` [PATCH v8 37/78] target/riscv: rvv-1.0: floating-point move instruction frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 38/78] target/riscv: rvv-1.0: floating-point scalar move instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-21  4:26   ` Alistair Francis
2021-10-21  4:26     ` Alistair Francis
2021-10-15  7:45 ` [PATCH v8 39/78] target/riscv: rvv-1.0: whole register " frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-21  4:27   ` Alistair Francis
2021-10-21  4:27     ` Alistair Francis
2021-10-15  7:45 ` [PATCH v8 40/78] target/riscv: rvv-1.0: integer extension instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` frank.chang [this message]
2021-10-15  7:45   ` [PATCH v8 41/78] target/riscv: rvv-1.0: single-width averaging add and subtract instructions frank.chang
2021-10-15  7:45 ` [PATCH v8 42/78] target/riscv: rvv-1.0: single-width bit shift instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 43/78] target/riscv: rvv-1.0: integer add-with-carry/subtract-with-borrow frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-25  6:08   ` Alistair Francis
2021-10-25  6:08     ` Alistair Francis
2021-10-15  7:45 ` [PATCH v8 44/78] target/riscv: rvv-1.0: narrowing integer right shift instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 45/78] target/riscv: rvv-1.0: widening integer multiply-add instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 46/78] target/riscv: rvv-1.0: single-width saturating add and subtract instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 47/78] target/riscv: rvv-1.0: integer comparison instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 48/78] target/riscv: rvv-1.0: floating-point compare instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 49/78] target/riscv: rvv-1.0: mask-register logical instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 50/78] target/riscv: rvv-1.0: slide instructions frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-15  7:45 ` [PATCH v8 51/78] target/riscv: rvv-1.0: floating-point " frank.chang
2021-10-15  7:45   ` frank.chang
2021-10-25  6:13   ` Alistair Francis
2021-10-25  6:13     ` Alistair Francis
2021-10-15  7:46 ` [PATCH v8 52/78] target/riscv: rvv-1.0: narrowing fixed-point clip instructions frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 53/78] target/riscv: rvv-1.0: single-width floating-point reduction frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 54/78] target/riscv: rvv-1.0: widening floating-point reduction instructions frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 55/78] target/riscv: rvv-1.0: single-width scaling shift instructions frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 56/78] target/riscv: rvv-1.0: remove widening saturating scaled multiply-add frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 57/78] target/riscv: rvv-1.0: remove vmford.vv and vmford.vf frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 58/78] target/riscv: rvv-1.0: remove integer extract instruction frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 59/78] target/riscv: rvv-1.0: floating-point min/max instructions frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 60/78] target/riscv: introduce floating-point rounding mode enum frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 61/78] target/riscv: rvv-1.0: floating-point/integer type-convert instructions frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-25  6:16   ` Alistair Francis
2021-10-25  6:16     ` Alistair Francis
2021-10-15  7:46 ` [PATCH v8 62/78] target/riscv: rvv-1.0: widening floating-point/integer type-convert frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-26  6:29   ` Alistair Francis
2021-10-26  6:29     ` Alistair Francis
2021-10-15  7:46 ` [PATCH v8 63/78] target/riscv: add "set round to odd" rounding mode helper function frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-26  6:30   ` Alistair Francis
2021-10-26  6:30     ` Alistair Francis
2021-10-15  7:46 ` [PATCH v8 64/78] target/riscv: rvv-1.0: narrowing floating-point/integer type-convert frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-26  6:32   ` Alistair Francis
2021-10-26  6:32     ` Alistair Francis
2021-10-15  7:46 ` [PATCH v8 65/78] target/riscv: rvv-1.0: relax RV_VLEN_MAX to 1024-bits frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-25  6:43   ` Alistair Francis
2021-10-25  6:43     ` Alistair Francis
2021-10-15  7:46 ` [PATCH v8 66/78] target/riscv: rvv-1.0: implement vstart CSR frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 67/78] target/riscv: rvv-1.0: trigger illegal instruction exception if frm is not valid frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 68/78] target/riscv: rvv-1.0: set mstatus.SD bit when writing vector CSRs frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 69/78] target/riscv: gdb: support vector registers for rv64 & rv32 frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 70/78] target/riscv: rvv-1.0: floating-point reciprocal square-root estimate instruction frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 71/78] target/riscv: rvv-1.0: floating-point reciprocal " frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 72/78] target/riscv: set mstatus.SD bit when writing fp CSRs frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-25  6:42   ` Alistair Francis
2021-10-25  6:42     ` Alistair Francis
2021-10-15  7:46 ` [PATCH v8 73/78] target/riscv: rvv-1.0: rename r2_zimm to r2_zimm11 frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-26  6:50   ` Alistair Francis
2021-10-26  6:50     ` Alistair Francis
2021-10-15  7:46 ` [PATCH v8 74/78] target/riscv: rvv-1.0: add vsetivli instruction frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-26  6:52   ` Alistair Francis
2021-10-26  6:52     ` Alistair Francis
2021-10-15  7:46 ` [PATCH v8 75/78] target/riscv: rvv-1.0: add evl parameter to vext_ldst_us() frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 76/78] target/riscv: rvv-1.0: add vector unit-stride mask load/store insns frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-15  7:46 ` [PATCH v8 77/78] target/riscv: rvv-1.0: rename vmandnot.mm and vmornot.mm to vmandn.mm and vmorn.mm frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-26  6:55   ` Alistair Francis
2021-10-26  6:55     ` Alistair Francis
2021-10-15  7:46 ` [PATCH v8 78/78] target/riscv: rvv-1.0: update opivv_vadc_check() comment frank.chang
2021-10-15  7:46   ` frank.chang
2021-10-26  6:49   ` Alistair Francis
2021-10-26  6:49     ` Alistair Francis
2021-10-15  9:02 ` [PATCH v8 00/78] support vector extension v1.0 Frank Chang
2021-10-15  9:02   ` Frank Chang
2021-10-18  6:00 ` Alistair Francis
2021-10-18  6:00   ` Alistair Francis
2021-10-18  6:09   ` Frank Chang
2021-10-18  6:09     ` Frank Chang
2021-10-18  6:12     ` Alistair Francis
2021-10-18  6:12       ` Alistair Francis
2021-10-18  6:17       ` Frank Chang
2021-10-18  6:17         ` Frank Chang
2021-10-18  9:01 ` LIU Zhiwei
2021-10-18  9:34   ` LIU Zhiwei
2021-10-20  5:28     ` Alistair Francis
2021-10-20  5:28       ` Alistair Francis

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211015074627.3957162-49-frank.chang@sifive.com \
    --to=frank.chang@sifive.com \
    --cc=alistair.francis@wdc.com \
    --cc=bin.meng@windriver.com \
    --cc=palmer@dabbelt.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-riscv@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.