All of lore.kernel.org
 help / color / mirror / Atom feed
From: Song Gao <gaosong@loongson.cn>
To: qemu-devel@nongnu.org
Cc: richard.henderson@linaro.org
Subject: [RFC PATCH 10/43] target/loongarch: Implement vaddw/vsubw
Date: Sat, 24 Dec 2022 16:16:00 +0800	[thread overview]
Message-ID: <20221224081633.4185445-11-gaosong@loongson.cn> (raw)
In-Reply-To: <20221224081633.4185445-1-gaosong@loongson.cn>

This patch includes:
- VADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
- VSUBW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
- VADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.

Signed-off-by: Song Gao <gaosong@loongson.cn>
---
 target/loongarch/disas.c                    |  43 ++++
 target/loongarch/helper.h                   |  43 ++++
 target/loongarch/insn_trans/trans_lsx.c.inc |  43 ++++
 target/loongarch/insns.decode               |  43 ++++
 target/loongarch/lsx_helper.c               | 243 ++++++++++++++++++++
 5 files changed, 415 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 1a906e8714..81253f00e9 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -836,3 +836,46 @@ INSN_LSX(vhsubw_hu_bu,     vvv)
 INSN_LSX(vhsubw_wu_hu,     vvv)
 INSN_LSX(vhsubw_du_wu,     vvv)
 INSN_LSX(vhsubw_qu_du,     vvv)
+
+INSN_LSX(vaddwev_h_b,      vvv)
+INSN_LSX(vaddwev_w_h,      vvv)
+INSN_LSX(vaddwev_d_w,      vvv)
+INSN_LSX(vaddwev_q_d,      vvv)
+INSN_LSX(vaddwod_h_b,      vvv)
+INSN_LSX(vaddwod_w_h,      vvv)
+INSN_LSX(vaddwod_d_w,      vvv)
+INSN_LSX(vaddwod_q_d,      vvv)
+INSN_LSX(vsubwev_h_b,      vvv)
+INSN_LSX(vsubwev_w_h,      vvv)
+INSN_LSX(vsubwev_d_w,      vvv)
+INSN_LSX(vsubwev_q_d,      vvv)
+INSN_LSX(vsubwod_h_b,      vvv)
+INSN_LSX(vsubwod_w_h,      vvv)
+INSN_LSX(vsubwod_d_w,      vvv)
+INSN_LSX(vsubwod_q_d,      vvv)
+
+INSN_LSX(vaddwev_h_bu,     vvv)
+INSN_LSX(vaddwev_w_hu,     vvv)
+INSN_LSX(vaddwev_d_wu,     vvv)
+INSN_LSX(vaddwev_q_du,     vvv)
+INSN_LSX(vaddwod_h_bu,     vvv)
+INSN_LSX(vaddwod_w_hu,     vvv)
+INSN_LSX(vaddwod_d_wu,     vvv)
+INSN_LSX(vaddwod_q_du,     vvv)
+INSN_LSX(vsubwev_h_bu,     vvv)
+INSN_LSX(vsubwev_w_hu,     vvv)
+INSN_LSX(vsubwev_d_wu,     vvv)
+INSN_LSX(vsubwev_q_du,     vvv)
+INSN_LSX(vsubwod_h_bu,     vvv)
+INSN_LSX(vsubwod_w_hu,     vvv)
+INSN_LSX(vsubwod_d_wu,     vvv)
+INSN_LSX(vsubwod_q_du,     vvv)
+
+INSN_LSX(vaddwev_h_bu_b,   vvv)
+INSN_LSX(vaddwev_w_hu_h,   vvv)
+INSN_LSX(vaddwev_d_wu_w,   vvv)
+INSN_LSX(vaddwev_q_du_d,   vvv)
+INSN_LSX(vaddwod_h_bu_b,   vvv)
+INSN_LSX(vaddwod_w_hu_h,   vvv)
+INSN_LSX(vaddwod_d_wu_w,   vvv)
+INSN_LSX(vaddwod_q_du_d,   vvv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 4db8ca599e..ff16626381 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -190,3 +190,46 @@ DEF_HELPER_4(vhsubw_hu_bu, void, env, i32, i32, i32)
 DEF_HELPER_4(vhsubw_wu_hu, void, env, i32, i32, i32)
 DEF_HELPER_4(vhsubw_du_wu, void, env, i32, i32, i32)
 DEF_HELPER_4(vhsubw_qu_du, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vaddwev_h_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwev_w_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwev_d_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwev_q_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwod_h_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwod_w_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwod_d_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwod_q_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwev_h_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwev_w_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwev_d_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwev_q_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwod_h_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwod_w_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwod_d_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwod_q_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vaddwev_h_bu, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwev_w_hu, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwev_d_wu, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwev_q_du, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwod_h_bu, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwod_w_hu, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwod_d_wu, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwod_q_du, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwev_h_bu, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwev_w_hu, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwev_d_wu, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwev_q_du, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwod_h_bu, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwod_w_hu, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwod_d_wu, void, env, i32, i32, i32)
+DEF_HELPER_4(vsubwod_q_du, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vaddwev_h_bu_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwev_w_hu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwev_d_wu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwev_q_du_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwod_h_bu_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwod_w_hu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwod_d_wu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vaddwod_q_du_d, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
index f278a3cd00..69111c498c 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -108,3 +108,46 @@ TRANS(vhsubw_hu_bu, gen_vvv, gen_helper_vhsubw_hu_bu)
 TRANS(vhsubw_wu_hu, gen_vvv, gen_helper_vhsubw_wu_hu)
 TRANS(vhsubw_du_wu, gen_vvv, gen_helper_vhsubw_du_wu)
 TRANS(vhsubw_qu_du, gen_vvv, gen_helper_vhsubw_qu_du)
+
+TRANS(vaddwev_h_b, gen_vvv, gen_helper_vaddwev_h_b)
+TRANS(vaddwev_w_h, gen_vvv, gen_helper_vaddwev_w_h)
+TRANS(vaddwev_d_w, gen_vvv, gen_helper_vaddwev_d_w)
+TRANS(vaddwev_q_d, gen_vvv, gen_helper_vaddwev_q_d)
+TRANS(vaddwod_h_b, gen_vvv, gen_helper_vaddwod_h_b)
+TRANS(vaddwod_w_h, gen_vvv, gen_helper_vaddwod_w_h)
+TRANS(vaddwod_d_w, gen_vvv, gen_helper_vaddwod_d_w)
+TRANS(vaddwod_q_d, gen_vvv, gen_helper_vaddwod_q_d)
+TRANS(vsubwev_h_b, gen_vvv, gen_helper_vsubwev_h_b)
+TRANS(vsubwev_w_h, gen_vvv, gen_helper_vsubwev_w_h)
+TRANS(vsubwev_d_w, gen_vvv, gen_helper_vsubwev_d_w)
+TRANS(vsubwev_q_d, gen_vvv, gen_helper_vsubwev_q_d)
+TRANS(vsubwod_h_b, gen_vvv, gen_helper_vsubwod_h_b)
+TRANS(vsubwod_w_h, gen_vvv, gen_helper_vsubwod_w_h)
+TRANS(vsubwod_d_w, gen_vvv, gen_helper_vsubwod_d_w)
+TRANS(vsubwod_q_d, gen_vvv, gen_helper_vsubwod_q_d)
+
+TRANS(vaddwev_h_bu, gen_vvv, gen_helper_vaddwev_h_bu)
+TRANS(vaddwev_w_hu, gen_vvv, gen_helper_vaddwev_w_hu)
+TRANS(vaddwev_d_wu, gen_vvv, gen_helper_vaddwev_d_wu)
+TRANS(vaddwev_q_du, gen_vvv, gen_helper_vaddwev_q_du)
+TRANS(vaddwod_h_bu, gen_vvv, gen_helper_vaddwod_h_bu)
+TRANS(vaddwod_w_hu, gen_vvv, gen_helper_vaddwod_w_hu)
+TRANS(vaddwod_d_wu, gen_vvv, gen_helper_vaddwod_d_wu)
+TRANS(vaddwod_q_du, gen_vvv, gen_helper_vaddwod_q_du)
+TRANS(vsubwev_h_bu, gen_vvv, gen_helper_vsubwev_h_bu)
+TRANS(vsubwev_w_hu, gen_vvv, gen_helper_vsubwev_w_hu)
+TRANS(vsubwev_d_wu, gen_vvv, gen_helper_vsubwev_d_wu)
+TRANS(vsubwev_q_du, gen_vvv, gen_helper_vsubwev_q_du)
+TRANS(vsubwod_h_bu, gen_vvv, gen_helper_vsubwod_h_bu)
+TRANS(vsubwod_w_hu, gen_vvv, gen_helper_vsubwod_w_hu)
+TRANS(vsubwod_d_wu, gen_vvv, gen_helper_vsubwod_d_wu)
+TRANS(vsubwod_q_du, gen_vvv, gen_helper_vsubwod_q_du)
+
+TRANS(vaddwev_h_bu_b, gen_vvv, gen_helper_vaddwev_h_bu_b)
+TRANS(vaddwev_w_hu_h, gen_vvv, gen_helper_vaddwev_w_hu_h)
+TRANS(vaddwev_d_wu_w, gen_vvv, gen_helper_vaddwev_d_wu_w)
+TRANS(vaddwev_q_du_d, gen_vvv, gen_helper_vaddwev_q_du_d)
+TRANS(vaddwod_h_bu_b, gen_vvv, gen_helper_vaddwod_h_bu_b)
+TRANS(vaddwod_w_hu_h, gen_vvv, gen_helper_vaddwod_w_hu_h)
+TRANS(vaddwod_d_wu_w, gen_vvv, gen_helper_vaddwod_d_wu_w)
+TRANS(vaddwod_q_du_d, gen_vvv, gen_helper_vaddwod_q_du_d)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 77f9ab5a36..7e99ead2de 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -558,3 +558,46 @@ vhsubw_hu_bu     0111 00000101 10100 ..... ..... .....    @vvv
 vhsubw_wu_hu     0111 00000101 10101 ..... ..... .....    @vvv
 vhsubw_du_wu     0111 00000101 10110 ..... ..... .....    @vvv
 vhsubw_qu_du     0111 00000101 10111 ..... ..... .....    @vvv
+
+vaddwev_h_b      0111 00000001 11100 ..... ..... .....    @vvv
+vaddwev_w_h      0111 00000001 11101 ..... ..... .....    @vvv
+vaddwev_d_w      0111 00000001 11110 ..... ..... .....    @vvv
+vaddwev_q_d      0111 00000001 11111 ..... ..... .....    @vvv
+vaddwod_h_b      0111 00000010 00100 ..... ..... .....    @vvv
+vaddwod_w_h      0111 00000010 00101 ..... ..... .....    @vvv
+vaddwod_d_w      0111 00000010 00110 ..... ..... .....    @vvv
+vaddwod_q_d      0111 00000010 00111 ..... ..... .....    @vvv
+vsubwev_h_b      0111 00000010 00000 ..... ..... .....    @vvv
+vsubwev_w_h      0111 00000010 00001 ..... ..... .....    @vvv
+vsubwev_d_w      0111 00000010 00010 ..... ..... .....    @vvv
+vsubwev_q_d      0111 00000010 00011 ..... ..... .....    @vvv
+vsubwod_h_b      0111 00000010 01000 ..... ..... .....    @vvv
+vsubwod_w_h      0111 00000010 01001 ..... ..... .....    @vvv
+vsubwod_d_w      0111 00000010 01010 ..... ..... .....    @vvv
+vsubwod_q_d      0111 00000010 01011 ..... ..... .....    @vvv
+
+vaddwev_h_bu     0111 00000010 11100 ..... ..... .....    @vvv
+vaddwev_w_hu     0111 00000010 11101 ..... ..... .....    @vvv
+vaddwev_d_wu     0111 00000010 11110 ..... ..... .....    @vvv
+vaddwev_q_du     0111 00000010 11111 ..... ..... .....    @vvv
+vaddwod_h_bu     0111 00000011 00100 ..... ..... .....    @vvv
+vaddwod_w_hu     0111 00000011 00101 ..... ..... .....    @vvv
+vaddwod_d_wu     0111 00000011 00110 ..... ..... .....    @vvv
+vaddwod_q_du     0111 00000011 00111 ..... ..... .....    @vvv
+vsubwev_h_bu     0111 00000011 00000 ..... ..... .....    @vvv
+vsubwev_w_hu     0111 00000011 00001 ..... ..... .....    @vvv
+vsubwev_d_wu     0111 00000011 00010 ..... ..... .....    @vvv
+vsubwev_q_du     0111 00000011 00011 ..... ..... .....    @vvv
+vsubwod_h_bu     0111 00000011 01000 ..... ..... .....    @vvv
+vsubwod_w_hu     0111 00000011 01001 ..... ..... .....    @vvv
+vsubwod_d_wu     0111 00000011 01010 ..... ..... .....    @vvv
+vsubwod_q_du     0111 00000011 01011 ..... ..... .....    @vvv
+
+vaddwev_h_bu_b   0111 00000011 11100 ..... ..... .....    @vvv
+vaddwev_w_hu_h   0111 00000011 11101 ..... ..... .....    @vvv
+vaddwev_d_wu_w   0111 00000011 11110 ..... ..... .....    @vvv
+vaddwev_q_du_d   0111 00000011 11111 ..... ..... .....    @vvv
+vaddwod_h_bu_b   0111 00000100 00000 ..... ..... .....    @vvv
+vaddwod_w_hu_h   0111 00000100 00001 ..... ..... .....    @vvv
+vaddwod_d_wu_w   0111 00000100 00010 ..... ..... .....    @vvv
+vaddwod_q_du_d   0111 00000100 00011 ..... ..... .....    @vvv
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
index cb9b691dc7..9e3131af1b 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/lsx_helper.c
@@ -473,3 +473,246 @@ DO_HELPER_VVV(vhsubw_hu_bu, 16, helper_vvv, do_vhsubw_u)
 DO_HELPER_VVV(vhsubw_wu_hu, 32, helper_vvv, do_vhsubw_u)
 DO_HELPER_VVV(vhsubw_du_wu, 64, helper_vvv, do_vhsubw_u)
 DO_HELPER_VVV(vhsubw_qu_du, 128, helper_vvv, do_vhsubw_u)
+
+static void do_vaddwev_s(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n)
+{
+    switch (bit) {
+    case 16:
+        Vd->H[n] = (int16_t)Vj->B[2 * n] + (int16_t)Vk->B[2 * n];
+        break;
+    case 32:
+        Vd->W[n] = (int32_t)Vj->H[2 * n] + (int32_t)Vk->H[2 * n];
+        break;
+    case 64:
+        Vd->D[n] = (int64_t)Vj->W[2 * n] + (int64_t)Vk->W[2 * n];
+        break;
+    case 128:
+        Vd->Q[n] = (__int128)Vj->D[2 * n] + (__int128)Vk->D[2 * n];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void do_vaddwod_s(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n)
+{
+    switch (bit) {
+    case 16:
+        Vd->H[n] = (int16_t)Vj->B[2 * n + 1] + (int16_t)Vk->B[2 * n + 1];
+        break;
+    case 32:
+        Vd->W[n] = (int32_t)Vj->H[2 * n + 1] + (int32_t)Vk->H[2 * n + 1];
+        break;
+    case 64:
+        Vd->D[n] = (int64_t)Vj->W[2 * n + 1] + (int64_t)Vk->W[2 * n + 1];
+        break;
+    case 128:
+        Vd->Q[n] = (__int128)Vj->D[2 * n + 1] + (__int128)Vk->D[2 * n + 1];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void do_vsubwev_s(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n)
+{
+    switch (bit) {
+    case 16:
+        Vd->H[n] = (int16_t)Vj->B[2 * n] - (int16_t)Vk->B[2 * n];
+        break;
+    case 32:
+        Vd->W[n] = (int32_t)Vj->H[2 * n] - (int32_t)Vk->H[2 * n];
+        break;
+    case 64:
+        Vd->D[n] = (int64_t)Vj->W[2 * n] - (int64_t)Vk->W[2 * n];
+        break;
+    case 128:
+        Vd->Q[n] = (__int128)Vj->D[2 * n] - (__int128)Vk->D[2 * n];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void do_vsubwod_s(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n)
+{
+    switch (bit) {
+    case 16:
+        Vd->H[n] = (int16_t)Vj->B[2 * n + 1] - (int16_t)Vk->B[2 * n + 1];
+        break;
+    case 32:
+        Vd->W[n] = (int32_t)Vj->H[2 * n + 1] - (int32_t)Vk->H[2 * n + 1];
+        break;
+    case 64:
+        Vd->D[n] = (int64_t)Vj->W[2 * n + 1] - (int64_t)Vk->W[2 * n + 1];
+        break;
+    case 128:
+        Vd->Q[n] = (__int128)Vj->D[2 * n + 1] - (__int128)Vk->D[2 * n + 1];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+DO_HELPER_VVV(vaddwev_h_b, 16, helper_vvv, do_vaddwev_s)
+DO_HELPER_VVV(vaddwev_w_h, 32, helper_vvv, do_vaddwev_s)
+DO_HELPER_VVV(vaddwev_d_w, 64, helper_vvv, do_vaddwev_s)
+DO_HELPER_VVV(vaddwev_q_d, 128, helper_vvv, do_vaddwev_s)
+DO_HELPER_VVV(vaddwod_h_b, 16, helper_vvv, do_vaddwod_s)
+DO_HELPER_VVV(vaddwod_w_h, 32, helper_vvv, do_vaddwod_s)
+DO_HELPER_VVV(vaddwod_d_w, 64, helper_vvv, do_vaddwod_s)
+DO_HELPER_VVV(vaddwod_q_d, 128, helper_vvv, do_vaddwod_s)
+DO_HELPER_VVV(vsubwev_h_b, 16, helper_vvv, do_vsubwev_s)
+DO_HELPER_VVV(vsubwev_w_h, 32, helper_vvv, do_vsubwev_s)
+DO_HELPER_VVV(vsubwev_d_w, 64, helper_vvv, do_vsubwev_s)
+DO_HELPER_VVV(vsubwev_q_d, 128, helper_vvv, do_vsubwev_s)
+DO_HELPER_VVV(vsubwod_h_b, 16, helper_vvv, do_vsubwod_s)
+DO_HELPER_VVV(vsubwod_w_h, 32, helper_vvv, do_vsubwod_s)
+DO_HELPER_VVV(vsubwod_d_w, 64, helper_vvv, do_vsubwod_s)
+DO_HELPER_VVV(vsubwod_q_d, 128, helper_vvv, do_vsubwod_s)
+
+static void do_vaddwev_u(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n)
+{
+    switch (bit) {
+    case 16:
+        Vd->H[n] = (uint16_t)(uint8_t)Vj->B[2 * n] + (uint16_t)(uint8_t)Vk->B[2 * n];
+        break;
+    case 32:
+        Vd->W[n] = (uint32_t)(uint16_t)Vj->H[2 * n] + (uint32_t)(uint16_t)Vk->H[2 * n];
+        break;
+    case 64:
+        Vd->D[n] = (uint64_t)(uint32_t)Vj->W[2 * n] + (uint64_t)(uint32_t)Vk->W[2 * n];
+        break;
+    case 128:
+        Vd->Q[n] = (__uint128_t)(uint64_t)Vj->D[2 * n] + (__uint128_t)(uint64_t)Vk->D[2 * n];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void do_vaddwod_u(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n)
+{
+    switch (bit) {
+    case 16:
+        Vd->H[n] = (uint16_t)(uint8_t)Vj->B[2 * n + 1] + (uint16_t)(uint8_t)Vk->B[2 * n + 1];
+        break;
+    case 32:
+        Vd->W[n] = (uint32_t)(uint16_t)Vj->H[2 * n + 1] + (uint32_t)(uint16_t)Vk->H[2 * n + 1];
+        break;
+    case 64:
+        Vd->D[n] = (uint64_t)(uint32_t)Vj->W[2 * n + 1] + (uint64_t)(uint32_t)Vk->W[2 * n + 1];
+        break;
+    case 128:
+        Vd->Q[n] = (__uint128_t)(uint64_t)Vj->D[2 * n + 1] + (__uint128_t)(uint64_t )Vk->D[2 * n + 1];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void do_vsubwev_u(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n)
+{
+    switch (bit) {
+    case 16:
+        Vd->H[n] = (uint16_t)(uint8_t)Vj->B[2 * n] - (uint16_t)(uint8_t)Vk->B[2 * n];
+        break;
+    case 32:
+        Vd->W[n] = (uint32_t)(uint16_t)Vj->H[2 * n] - (uint32_t)(uint16_t)Vk->H[2 * n];
+        break;
+    case 64:
+        Vd->D[n] = (uint64_t)(uint32_t)Vj->W[2 * n] - (uint64_t)(uint32_t)Vk->W[2 * n];
+        break;
+    case 128:
+        Vd->Q[n] = (__uint128_t)(uint64_t)Vj->D[2 * n] - (__uint128_t)(uint64_t)Vk->D[2 * n];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void do_vsubwod_u(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n)
+{
+    switch (bit) {
+    case 16:
+        Vd->H[n] = (uint16_t)(uint8_t)Vj->B[2 * n + 1] - (uint16_t)(uint8_t)Vk->B[2 * n + 1];
+        break;
+    case 32:
+        Vd->W[n] = (uint32_t)(uint16_t)Vj->H[2 * n + 1] - (uint32_t)(uint16_t)Vk->H[2 * n + 1];
+        break;
+    case 64:
+        Vd->D[n] = (uint64_t)(uint32_t)Vj->W[2 * n + 1] - (uint64_t)(uint32_t)Vk->W[2 * n + 1];
+        break;
+    case 128:
+        Vd->Q[n] = (__uint128_t)(uint64_t)Vj->D[2 * n + 1] - (__uint128_t)(uint64_t)Vk->D[2 * n + 1];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+DO_HELPER_VVV(vaddwev_h_bu, 16, helper_vvv, do_vaddwev_u)
+DO_HELPER_VVV(vaddwev_w_hu, 32, helper_vvv, do_vaddwev_u)
+DO_HELPER_VVV(vaddwev_d_wu, 64, helper_vvv, do_vaddwev_u)
+DO_HELPER_VVV(vaddwev_q_du, 128, helper_vvv, do_vaddwev_u)
+DO_HELPER_VVV(vaddwod_h_bu, 16, helper_vvv, do_vaddwod_u)
+DO_HELPER_VVV(vaddwod_w_hu, 32, helper_vvv, do_vaddwod_u)
+DO_HELPER_VVV(vaddwod_d_wu, 64, helper_vvv, do_vaddwod_u)
+DO_HELPER_VVV(vaddwod_q_du, 128, helper_vvv, do_vaddwod_u)
+DO_HELPER_VVV(vsubwev_h_bu, 16, helper_vvv, do_vsubwev_u)
+DO_HELPER_VVV(vsubwev_w_hu, 32, helper_vvv, do_vsubwev_u)
+DO_HELPER_VVV(vsubwev_d_wu, 64, helper_vvv, do_vsubwev_u)
+DO_HELPER_VVV(vsubwev_q_du, 128, helper_vvv, do_vsubwev_u)
+DO_HELPER_VVV(vsubwod_h_bu, 16, helper_vvv, do_vsubwod_u)
+DO_HELPER_VVV(vsubwod_w_hu, 32, helper_vvv, do_vsubwod_u)
+DO_HELPER_VVV(vsubwod_d_wu, 64, helper_vvv, do_vsubwod_u)
+DO_HELPER_VVV(vsubwod_q_du, 128, helper_vvv, do_vsubwod_u)
+
+static void do_vaddwev_u_s(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n)
+{
+    switch (bit) {
+    case 16:
+        Vd->H[n] = (uint16_t)(uint8_t)Vj->B[2 * n] + (int16_t)Vk->B[2 * n];
+        break;
+    case 32:
+        Vd->W[n] = (uint32_t)(uint16_t)Vj->H[2 * n] + (int32_t)Vk->H[2 * n];
+        break;
+    case 64:
+        Vd->D[n] = (uint64_t)(uint32_t)Vj->W[2 * n] + (int64_t)Vk->W[2 * n];
+        break;
+    case 128:
+        Vd->Q[n] = (__uint128_t)(uint64_t)Vj->D[2 * n] + (__int128)Vk->D[2 * n];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void do_vaddwod_u_s(vec_t *Vd, vec_t *Vj, vec_t *Vk, int bit, int n)
+{
+    switch (bit) {
+    case 16:
+        Vd->H[n] = (uint16_t)(uint8_t)Vj->B[2 * n + 1] + (int16_t)Vk->B[2 * n + 1];
+        break;
+    case 32:
+        Vd->W[n] = (uint32_t)(uint16_t)Vj->H[2 * n + 1] + (int32_t)Vk->H[2 * n + 1];
+        break;
+    case 64:
+        Vd->D[n] = (uint64_t)(uint32_t)Vj->W[2 * n + 1] + (int64_t)Vk->W[2 * n + 1];
+        break;
+    case 128:
+        Vd->Q[n] = (__uint128_t)(uint64_t)Vj->D[2 * n + 1] + (__int128)Vk->D[2 * n + 1];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+DO_HELPER_VVV(vaddwev_h_bu_b, 16, helper_vvv, do_vaddwev_u_s)
+DO_HELPER_VVV(vaddwev_w_hu_h, 32, helper_vvv, do_vaddwev_u_s)
+DO_HELPER_VVV(vaddwev_d_wu_w, 64, helper_vvv, do_vaddwev_u_s)
+DO_HELPER_VVV(vaddwev_q_du_d, 128, helper_vvv, do_vaddwev_u_s)
+DO_HELPER_VVV(vaddwod_h_bu_b, 16, helper_vvv, do_vaddwod_u_s)
+DO_HELPER_VVV(vaddwod_w_hu_h, 32, helper_vvv, do_vaddwod_u_s)
+DO_HELPER_VVV(vaddwod_d_wu_w, 64, helper_vvv, do_vaddwod_u_s)
+DO_HELPER_VVV(vaddwod_q_du_d, 128, helper_vvv, do_vaddwod_u_s)
-- 
2.31.1



  parent reply	other threads:[~2022-12-24  8:24 UTC|newest]

Thread overview: 100+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-24  8:15 [RFC PATCH 00/43] Add LoongArch LSX instructions Song Gao
2022-12-24  8:15 ` [RFC PATCH 01/43] target/loongarch: Add vector data type vec_t Song Gao
2022-12-24 17:07   ` Richard Henderson
2022-12-24 17:24   ` Richard Henderson
2022-12-28  2:34     ` gaosong
2022-12-28 17:30       ` Richard Henderson
2022-12-29  1:51         ` gaosong
2022-12-29  3:13           ` Richard Henderson
2022-12-29  3:54             ` gaosong
2022-12-24 17:32   ` Richard Henderson
2023-02-13  8:24     ` gaosong
2023-02-13 19:18       ` Richard Henderson
2022-12-24  8:15 ` [RFC PATCH 02/43] target/loongarch: CPUCFG support LSX Song Gao
2022-12-24  8:15 ` [RFC PATCH 03/43] target/loongarch: meson.build support build LSX Song Gao
2022-12-24  8:15 ` [RFC PATCH 04/43] target/loongarch: Add CHECK_SXE maccro for check LSX enable Song Gao
2022-12-24  8:15 ` [RFC PATCH 05/43] target/loongarch: Implement vadd/vsub Song Gao
2022-12-24 17:16   ` Richard Henderson
2022-12-24  8:15 ` [RFC PATCH 06/43] target/loongarch: Implement vaddi/vsubi Song Gao
2022-12-24 17:27   ` Richard Henderson
2022-12-24  8:15 ` [RFC PATCH 07/43] target/loongarch: Implement vneg Song Gao
2022-12-24 17:29   ` Richard Henderson
2022-12-24  8:15 ` [RFC PATCH 08/43] target/loongarch: Implement vsadd/vssub Song Gao
2022-12-24 17:31   ` Richard Henderson
2022-12-24  8:15 ` [RFC PATCH 09/43] target/loongarch: Implement vhaddw/vhsubw Song Gao
2022-12-24 17:41   ` Richard Henderson
2022-12-24  8:16 ` Song Gao [this message]
2022-12-24 17:48   ` [RFC PATCH 10/43] target/loongarch: Implement vaddw/vsubw Richard Henderson
2023-02-20  7:47     ` gaosong
2023-02-20 17:21       ` Richard Henderson
2023-02-23  8:23         ` gaosong
2023-02-23 15:22           ` Richard Henderson
2023-02-24  7:24             ` gaosong
2023-02-24 19:24               ` Richard Henderson
2023-02-27  9:14                 ` gaosong
2023-02-27  9:20                   ` Richard Henderson
2023-02-27 12:54                     ` gaosong
2023-02-27 18:32                       ` Richard Henderson
2023-02-27 12:55                 ` gaosong
2023-02-27 18:40                   ` Richard Henderson
2023-02-28  3:30                     ` gaosong
2023-02-28 16:48                       ` Richard Henderson
2023-02-24 23:01               ` Richard Henderson
2023-02-28  7:44                 ` gaosong
2023-02-28 16:50                   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 11/43] target/loongarch: Implement vavg/vavgr Song Gao
2022-12-24 17:52   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 12/43] target/loongarch: Implement vabsd Song Gao
2022-12-24 17:55   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 13/43] target/loongarch: Implement vadda Song Gao
2022-12-24 17:56   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 14/43] target/loongarch: Implement vmax/vmin Song Gao
2022-12-24 18:01   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 15/43] target/loongarch: Implement vmul/vmuh/vmulw{ev/od} Song Gao
2022-12-24 18:07   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 16/43] target/loongarch: Implement vmadd/vmsub/vmaddw{ev/od} Song Gao
2022-12-24 18:09   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 17/43] target/loongarch: Implement vdiv/vmod Song Gao
2022-12-24  8:16 ` [RFC PATCH 18/43] target/loongarch: Implement vsat Song Gao
2022-12-24 18:13   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 19/43] target/loongarch: Implement vexth Song Gao
2022-12-24  8:16 ` [RFC PATCH 20/43] target/loongarch: Implement vsigncov Song Gao
2022-12-24 18:18   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 21/43] target/loongarch: Implement vmskltz/vmskgez/vmsknz Song Gao
2022-12-24 18:31   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 22/43] target/loongarch: Implement LSX logic instructions Song Gao
2022-12-24 18:34   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 23/43] target/loongarch: Implement vsll vsrl vsra vrotr Song Gao
2022-12-24 18:36   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 24/43] target/loongarch: Implement vsllwil vextl Song Gao
2022-12-24  8:16 ` [RFC PATCH 25/43] target/loongarch: Implement vsrlr vsrar Song Gao
2022-12-24  8:16 ` [RFC PATCH 26/43] target/loongarch: Implement vsrln vsran Song Gao
2022-12-24  8:16 ` [RFC PATCH 27/43] target/loongarch: Implement vsrlrn vsrarn Song Gao
2022-12-24  8:16 ` [RFC PATCH 28/43] target/loongarch: Implement vssrln vssran Song Gao
2022-12-24  8:16 ` [RFC PATCH 29/43] target/loongarch: Implement vssrlrn vssrarn Song Gao
2022-12-24  8:16 ` [RFC PATCH 30/43] target/loongarch: Implement vclo vclz Song Gao
2022-12-24  8:16 ` [RFC PATCH 31/43] target/loongarch: Implement vpcnt Song Gao
2022-12-24  8:16 ` [RFC PATCH 32/43] target/loongarch: Implement vbitclr vbitset vbitrev Song Gao
2022-12-24  8:16 ` [RFC PATCH 33/43] target/loongarch: Implement vfrstp Song Gao
2022-12-24  8:16 ` [RFC PATCH 34/43] target/loongarch: Implement LSX fpu arith instructions Song Gao
2022-12-24  8:16 ` [RFC PATCH 35/43] target/loongarch: Implement LSX fpu fcvt instructions Song Gao
2022-12-24  8:16 ` [RFC PATCH 36/43] target/loongarch: Implement vseq vsle vslt Song Gao
2022-12-24 18:50   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 37/43] target/loongarch: Implement vfcmp Song Gao
2022-12-24  8:16 ` [RFC PATCH 38/43] target/loongarch: Implement vbitsel vset Song Gao
2022-12-24 19:15   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 39/43] target/loongarch: Implement vinsgr2vr vpickve2gr vreplgr2vr Song Gao
2022-12-24 20:34   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 40/43] target/loongarch: Implement vreplve vpack vpick Song Gao
2022-12-24 21:12   ` Richard Henderson
2023-03-21 11:31     ` gaosong
2023-03-21 15:55       ` Richard Henderson
2023-03-22  8:32         ` gaosong
2023-03-22 12:35           ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 41/43] target/loongarch: Implement vilvl vilvh vextrins vshuf Song Gao
2022-12-24  8:16 ` [RFC PATCH 42/43] target/loongarch: Implement vld vst Song Gao
2022-12-24 21:15   ` Richard Henderson
2022-12-24  8:16 ` [RFC PATCH 43/43] target/loongarch: Implement vldi Song Gao
2022-12-24 21:18   ` Richard Henderson
2022-12-24 15:39 ` [RFC PATCH 00/43] Add LoongArch LSX instructions Richard Henderson
2022-12-28  0:55   ` gaosong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221224081633.4185445-11-gaosong@loongson.cn \
    --to=gaosong@loongson.cn \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.