All of lore.kernel.org
 help / color / mirror / Atom feed
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: palmer@dabbelt.com, richard.henderson@linaro.org,
	bin.meng@windriver.com, Alistair.Francis@wdc.com,
	LIU Zhiwei <zhiwei_liu@c-sky.com>
Subject: [PATCH v2 03/37] target/riscv: 16-bit Addition & Subtraction Instructions
Date: Thu, 10 Jun 2021 15:58:34 +0800	[thread overview]
Message-ID: <20210610075908.3305506-4-zhiwei_liu@c-sky.com> (raw)
In-Reply-To: <20210610075908.3305506-1-zhiwei_liu@c-sky.com>

Include 5 groups: Wrap-around (dropping overflow), Signed Halving,
Unsigned Halving, Signed Saturation, and Unsigned Saturation.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 include/tcg/tcg-op-gvec.h               |  10 +
 target/riscv/helper.h                   |  30 ++
 target/riscv/insn32.decode              |  32 +++
 target/riscv/insn_trans/trans_rvp.c.inc | 117 ++++++++
 target/riscv/meson.build                |   1 +
 target/riscv/packed_helper.c            | 354 ++++++++++++++++++++++++
 target/riscv/translate.c                |   1 +
 tcg/tcg-op-gvec.c                       |  28 ++
 8 files changed, 573 insertions(+)
 create mode 100644 target/riscv/insn_trans/trans_rvp.c.inc
 create mode 100644 target/riscv/packed_helper.c

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index c69a7de984..2dae9e78d0 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -386,10 +386,12 @@ void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 a);
 
 void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 
 void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 
 void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
@@ -401,4 +403,12 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
 void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 
+#if TARGET_LONG_BITS == 64
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#else
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#endif
+
 #endif
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 415e37bc37..b6a71ade33 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1149,3 +1149,33 @@ DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+/* P extension function */
+DEF_HELPER_3(radd16, tl, env, tl, tl)
+DEF_HELPER_3(uradd16, tl, env, tl, tl)
+DEF_HELPER_3(kadd16, tl, env, tl, tl)
+DEF_HELPER_3(ukadd16, tl, env, tl, tl)
+DEF_HELPER_3(rsub16, tl, env, tl, tl)
+DEF_HELPER_3(ursub16, tl, env, tl, tl)
+DEF_HELPER_3(ksub16, tl, env, tl, tl)
+DEF_HELPER_3(uksub16, tl, env, tl, tl)
+DEF_HELPER_3(cras16, tl, env, tl, tl)
+DEF_HELPER_3(rcras16, tl, env, tl, tl)
+DEF_HELPER_3(urcras16, tl, env, tl, tl)
+DEF_HELPER_3(kcras16, tl, env, tl, tl)
+DEF_HELPER_3(ukcras16, tl, env, tl, tl)
+DEF_HELPER_3(crsa16, tl, env, tl, tl)
+DEF_HELPER_3(rcrsa16, tl, env, tl, tl)
+DEF_HELPER_3(urcrsa16, tl, env, tl, tl)
+DEF_HELPER_3(kcrsa16, tl, env, tl, tl)
+DEF_HELPER_3(ukcrsa16, tl, env, tl, tl)
+DEF_HELPER_3(stas16, tl, env, tl, tl)
+DEF_HELPER_3(rstas16, tl, env, tl, tl)
+DEF_HELPER_3(urstas16, tl, env, tl, tl)
+DEF_HELPER_3(kstas16, tl, env, tl, tl)
+DEF_HELPER_3(ukstas16, tl, env, tl, tl)
+DEF_HELPER_3(stsa16, tl, env, tl, tl)
+DEF_HELPER_3(rstsa16, tl, env, tl, tl)
+DEF_HELPER_3(urstsa16, tl, env, tl, tl)
+DEF_HELPER_3(kstsa16, tl, env, tl, tl)
+DEF_HELPER_3(ukstsa16, tl, env, tl, tl)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index f09f8d5faf..57f72fabf6 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -732,3 +732,35 @@ greviw     0110100 .......... 101 ..... 0011011 @sh5
 gorciw     0010100 .......... 101 ..... 0011011 @sh5
 
 slli_uw    00001. ........... 001 ..... 0011011 @sh
+
+# *** RV32P Extension ***
+add16      0100000  ..... ..... 000 ..... 1110111 @r
+radd16     0000000  ..... ..... 000 ..... 1110111 @r
+uradd16    0010000  ..... ..... 000 ..... 1110111 @r
+kadd16     0001000  ..... ..... 000 ..... 1110111 @r
+ukadd16    0011000  ..... ..... 000 ..... 1110111 @r
+sub16      0100001  ..... ..... 000 ..... 1110111 @r
+rsub16     0000001  ..... ..... 000 ..... 1110111 @r
+ursub16    0010001  ..... ..... 000 ..... 1110111 @r
+ksub16     0001001  ..... ..... 000 ..... 1110111 @r
+uksub16    0011001  ..... ..... 000 ..... 1110111 @r
+cras16     0100010  ..... ..... 000 ..... 1110111 @r
+rcras16    0000010  ..... ..... 000 ..... 1110111 @r
+urcras16   0010010  ..... ..... 000 ..... 1110111 @r
+kcras16    0001010  ..... ..... 000 ..... 1110111 @r
+ukcras16   0011010  ..... ..... 000 ..... 1110111 @r
+crsa16     0100011  ..... ..... 000 ..... 1110111 @r
+rcrsa16    0000011  ..... ..... 000 ..... 1110111 @r
+urcrsa16   0010011  ..... ..... 000 ..... 1110111 @r
+kcrsa16    0001011  ..... ..... 000 ..... 1110111 @r
+ukcrsa16   0011011  ..... ..... 000 ..... 1110111 @r
+stas16     1111010  ..... ..... 010 ..... 1110111 @r
+rstas16    1011010  ..... ..... 010 ..... 1110111 @r
+urstas16   1101010  ..... ..... 010 ..... 1110111 @r
+kstas16    1100010  ..... ..... 010 ..... 1110111 @r
+ukstas16   1110010  ..... ..... 010 ..... 1110111 @r
+stsa16     1111011  ..... ..... 010 ..... 1110111 @r
+rstsa16    1011011  ..... ..... 010 ..... 1110111 @r
+urstsa16   1101011  ..... ..... 010 ..... 1110111 @r
+kstsa16    1100011  ..... ..... 010 ..... 1110111 @r
+ukstsa16   1110011  ..... ..... 010 ..... 1110111 @r
diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc
new file mode 100644
index 0000000000..43f395657a
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvp.c.inc
@@ -0,0 +1,117 @@
+/*
+ * RISC-V translation routines for the RVP Standard Extension.
+ *
+ * Copyright (c) 2021 T-Head Semiconductor Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "tcg/tcg-op-gvec.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "tcg/tcg.h"
+
+/*
+ *** SIMD Data Processing Instructions
+ */
+
+/* 16-bit Addition & Subtraction Instructions */
+
+/*
+ * For some instructions, such as add16, an oberservation can be utilized:
+ * 1) If any reg is zero, it can be reduced to an inline op on the whole reg.
+ * 2) Otherwise, it can be acclebrated by an vec op.
+ */
+static inline bool
+r_inline(DisasContext *ctx, arg_r *a,
+         void (* vecop)(TCGv, TCGv, TCGv),
+         void (* op)(TCGv, TCGv, TCGv))
+{
+    if (!has_ext(ctx, RVP)) {
+        return false;
+    }
+    if (a->rd && a->rs1 && a->rs2) {
+        vecop(cpu_gpr[a->rd], cpu_gpr[a->rs1], cpu_gpr[a->rs2]);
+    } else {
+        gen_arith(ctx, a, op);
+    }
+    return true;
+}
+
+/* Complete inline implementation */
+#define GEN_RVP_R_INLINE(NAME, VECOP, OP)                \
+static bool trans_##NAME(DisasContext *s, arg_r *a)      \
+{                                                        \
+    return r_inline(s, a, VECOP, OP);                    \
+}
+
+GEN_RVP_R_INLINE(add16, tcg_gen_vec_add16_tl, tcg_gen_add_tl);
+GEN_RVP_R_INLINE(sub16, tcg_gen_vec_sub16_tl, tcg_gen_sub_tl);
+
+/* Out of line helpers for R format packed instructions */
+static inline bool
+r_ool(DisasContext *ctx, arg_r *a, void (* fn)(TCGv, TCGv_ptr, TCGv, TCGv))
+{
+    TCGv src1, src2, dst;
+    if (!has_ext(ctx, RVP)) {
+        return false;
+    }
+
+    src1 = tcg_temp_new();
+    src2 = tcg_temp_new();
+    dst = tcg_temp_new();
+
+    gen_get_gpr(src1, a->rs1);
+    gen_get_gpr(src2, a->rs2);
+    fn(dst, cpu_env, src1, src2);
+    gen_set_gpr(a->rd, dst);
+
+    tcg_temp_free(src1);
+    tcg_temp_free(src2);
+    tcg_temp_free(dst);
+    return true;
+}
+
+#define GEN_RVP_R_OOL(NAME)                            \
+static bool trans_##NAME(DisasContext *s, arg_r *a)    \
+{                                                      \
+    return r_ool(s, a, gen_helper_##NAME);             \
+}
+
+GEN_RVP_R_OOL(radd16);
+GEN_RVP_R_OOL(uradd16);
+GEN_RVP_R_OOL(kadd16);
+GEN_RVP_R_OOL(ukadd16);
+GEN_RVP_R_OOL(rsub16);
+GEN_RVP_R_OOL(ursub16);
+GEN_RVP_R_OOL(ksub16);
+GEN_RVP_R_OOL(uksub16);
+GEN_RVP_R_OOL(cras16);
+GEN_RVP_R_OOL(rcras16);
+GEN_RVP_R_OOL(urcras16);
+GEN_RVP_R_OOL(kcras16);
+GEN_RVP_R_OOL(ukcras16);
+GEN_RVP_R_OOL(crsa16);
+GEN_RVP_R_OOL(rcrsa16);
+GEN_RVP_R_OOL(urcrsa16);
+GEN_RVP_R_OOL(kcrsa16);
+GEN_RVP_R_OOL(ukcrsa16);
+GEN_RVP_R_OOL(stas16);
+GEN_RVP_R_OOL(rstas16);
+GEN_RVP_R_OOL(urstas16);
+GEN_RVP_R_OOL(kstas16);
+GEN_RVP_R_OOL(ukstas16);
+GEN_RVP_R_OOL(stsa16);
+GEN_RVP_R_OOL(rstsa16);
+GEN_RVP_R_OOL(urstsa16);
+GEN_RVP_R_OOL(kstsa16);
+GEN_RVP_R_OOL(ukstsa16);
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index d5e0bc93ea..cc169e1b2c 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -17,6 +17,7 @@ riscv_ss.add(files(
   'op_helper.c',
   'vector_helper.c',
   'bitmanip_helper.c',
+  'packed_helper.c',
   'translate.c',
 ))
 
diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
new file mode 100644
index 0000000000..b84abaaf25
--- /dev/null
+++ b/target/riscv/packed_helper.c
@@ -0,0 +1,354 @@
+/*
+ * RISC-V P Extension Helpers for QEMU.
+ *
+ * Copyright (c) 2021 T-Head Semiconductor Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
+#include "fpu/softfloat.h"
+#include <math.h>
+#include "internals.h"
+
+/*
+ *** SIMD Data Processing Instructions
+ */
+
+/* 16-bit Addition & Subtraction Instructions */
+typedef void PackedFn3i(CPURISCVState *, void *, void *, void *, uint8_t);
+
+/* Define a common function to loop elements in packed register */
+static inline target_ulong
+rvpr(CPURISCVState *env, target_ulong a, target_ulong b,
+     uint8_t step, uint8_t size, PackedFn3i *fn)
+{
+    int i, passes = sizeof(target_ulong) / size;
+    target_ulong result = 0;
+
+    for (i = 0; i < passes; i += step) {
+        fn(env, &result, &a, &b, i);
+    }
+    return result;
+}
+
+#define RVPR(NAME, STEP, SIZE)                                  \
+target_ulong HELPER(NAME)(CPURISCVState *env, target_ulong a,   \
+                          target_ulong b)                       \
+{                                                               \
+    return rvpr(env, a, b, STEP, SIZE, (PackedFn3i *)do_##NAME);\
+}
+
+static inline int32_t hadd32(int32_t a, int32_t b)
+{
+    return ((int64_t)a + b) >> 1;
+}
+
+static inline void do_radd16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[i] = hadd32(a[i], b[i]);
+}
+
+RVPR(radd16, 1, 2);
+
+static inline uint32_t haddu32(uint32_t a, uint32_t b)
+{
+    return ((uint64_t)a + b) >> 1;
+}
+
+static inline void do_uradd16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[i] = haddu32(a[i], b[i]);
+}
+
+RVPR(uradd16, 1, 2);
+
+static inline void do_kadd16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[i] = sadd16(env, 0, a[i], b[i]);
+}
+
+RVPR(kadd16, 1, 2);
+
+static inline void do_ukadd16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[i] = saddu16(env, 0, a[i], b[i]);
+}
+
+RVPR(ukadd16, 1, 2);
+
+static inline int32_t hsub32(int32_t a, int32_t b)
+{
+    return ((int64_t)a - b) >> 1;
+}
+
+static inline int64_t hsub64(int64_t a, int64_t b)
+{
+    int64_t res = a - b;
+    int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
+
+    /* With signed overflow, bit 64 is inverse of bit 63. */
+    return (res >> 1) ^ over;
+}
+
+static inline void do_rsub16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[i] = hsub32(a[i], b[i]);
+}
+
+RVPR(rsub16, 1, 2);
+
+static inline uint64_t hsubu64(uint64_t a, uint64_t b)
+{
+    return (a - b) >> 1;
+}
+
+static inline void do_ursub16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[i] = hsubu64(a[i], b[i]);
+}
+
+RVPR(ursub16, 1, 2);
+
+static inline void do_ksub16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[i] = ssub16(env, 0, a[i], b[i]);
+}
+
+RVPR(ksub16, 1, 2);
+
+static inline void do_uksub16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[i] = ssubu16(env, 0, a[i], b[i]);
+}
+
+RVPR(uksub16, 1, 2);
+
+static inline void do_cras16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = a[H2(i)] - b[H2(i + 1)];
+    d[H2(i + 1)] = a[H2(i + 1)] + b[H2(i)];
+}
+
+RVPR(cras16, 2, 2);
+
+static inline void do_rcras16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = hsub32(a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = hadd32(a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(rcras16, 2, 2);
+
+static inline void do_urcras16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = hsubu64(a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = haddu32(a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(urcras16, 2, 2);
+
+static inline void do_kcras16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = ssub16(env, 0, a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = sadd16(env, 0, a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(kcras16, 2, 2);
+
+static inline void do_ukcras16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = ssubu16(env, 0, a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = saddu16(env, 0, a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(ukcras16, 2, 2);
+
+static inline void do_crsa16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = a[H2(i)] + b[H2(i + 1)];
+    d[H2(i + 1)] = a[H2(i + 1)] - b[H2(i)];
+}
+
+RVPR(crsa16, 2, 2);
+
+static inline void do_rcrsa16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = hadd32(a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = hsub32(a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(rcrsa16, 2, 2);
+
+static inline void do_urcrsa16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = haddu32(a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = hsubu64(a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(urcrsa16, 2, 2);
+
+static inline void do_kcrsa16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = sadd16(env, 0, a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = ssub16(env, 0, a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(kcrsa16, 2, 2);
+
+static inline void do_ukcrsa16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = saddu16(env, 0, a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = ssubu16(env, 0, a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(ukcrsa16, 2, 2);
+
+static inline void do_stas16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = a[H2(i)] - b[H2(i)];
+    d[H2(i + 1)] = a[H2(i + 1)] + b[H2(i + 1)];
+}
+
+RVPR(stas16, 2, 2);
+
+static inline void do_rstas16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = hsub32(a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = hadd32(a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(rstas16, 2, 2);
+
+static inline void do_urstas16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = hsubu64(a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = haddu32(a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(urstas16, 2, 2);
+
+static inline void do_kstas16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = ssub16(env, 0, a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = sadd16(env, 0, a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(kstas16, 2, 2);
+
+static inline void do_ukstas16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = ssubu16(env, 0, a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = saddu16(env, 0, a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(ukstas16, 2, 2);
+
+static inline void do_stsa16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = a[H2(i)] + b[H2(i)];
+    d[H2(i + 1)] = a[H2(i + 1)] - b[H2(i + 1)];
+}
+
+RVPR(stsa16, 2, 2);
+
+static inline void do_rstsa16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = hadd32(a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = hsub32(a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(rstsa16, 2, 2);
+
+static inline void do_urstsa16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = haddu32(a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = hsubu64(a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(urstsa16, 2, 2);
+
+static inline void do_kstsa16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = sadd16(env, 0, a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = ssub16(env, 0, a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(kstsa16, 2, 2);
+
+static inline void do_ukstsa16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = saddu16(env, 0, a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = ssubu16(env, 0, a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(ukstsa16, 2, 2);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 0e6ede4d71..51b144e9be 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -908,6 +908,7 @@ static bool gen_unary(DisasContext *ctx, arg_r2 *a,
 #include "insn_trans/trans_rvh.c.inc"
 #include "insn_trans/trans_rvv.c.inc"
 #include "insn_trans/trans_rvb.c.inc"
+#include "insn_trans/trans_rvp.c.inc"
 #include "insn_trans/trans_privileged.c.inc"
 
 /* Include the auto-generated decoder for 16 bit insn */
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 498a959839..a8898ba7bf 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -1742,6 +1742,20 @@ void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_addv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    tcg_gen_andi_i32(t1, a, ~0xffff);
+    tcg_gen_add_i32(t2, a, b);
+    tcg_gen_add_i32(t1, t1, b);
+    tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
 void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 t1 = tcg_temp_new_i64();
@@ -1892,6 +1906,20 @@ void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_subv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    tcg_gen_andi_i32(t1, b, ~0xffff);
+    tcg_gen_sub_i32(t2, a, b);
+    tcg_gen_sub_i32(t1, a, t1);
+    tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
 void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 t1 = tcg_temp_new_i64();
-- 
2.25.1



WARNING: multiple messages have this Message-ID (diff)
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: Alistair.Francis@wdc.com, palmer@dabbelt.com,
	bin.meng@windriver.com, richard.henderson@linaro.org,
	LIU Zhiwei <zhiwei_liu@c-sky.com>
Subject: [PATCH v2 03/37] target/riscv: 16-bit Addition & Subtraction Instructions
Date: Thu, 10 Jun 2021 15:58:34 +0800	[thread overview]
Message-ID: <20210610075908.3305506-4-zhiwei_liu@c-sky.com> (raw)
In-Reply-To: <20210610075908.3305506-1-zhiwei_liu@c-sky.com>

Include 5 groups: Wrap-around (dropping overflow), Signed Halving,
Unsigned Halving, Signed Saturation, and Unsigned Saturation.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 include/tcg/tcg-op-gvec.h               |  10 +
 target/riscv/helper.h                   |  30 ++
 target/riscv/insn32.decode              |  32 +++
 target/riscv/insn_trans/trans_rvp.c.inc | 117 ++++++++
 target/riscv/meson.build                |   1 +
 target/riscv/packed_helper.c            | 354 ++++++++++++++++++++++++
 target/riscv/translate.c                |   1 +
 tcg/tcg-op-gvec.c                       |  28 ++
 8 files changed, 573 insertions(+)
 create mode 100644 target/riscv/insn_trans/trans_rvp.c.inc
 create mode 100644 target/riscv/packed_helper.c

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index c69a7de984..2dae9e78d0 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -386,10 +386,12 @@ void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 a);
 
 void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 
 void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 
 void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
@@ -401,4 +403,12 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
 void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 
+#if TARGET_LONG_BITS == 64
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#else
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#endif
+
 #endif
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 415e37bc37..b6a71ade33 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1149,3 +1149,33 @@ DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+/* P extension function */
+DEF_HELPER_3(radd16, tl, env, tl, tl)
+DEF_HELPER_3(uradd16, tl, env, tl, tl)
+DEF_HELPER_3(kadd16, tl, env, tl, tl)
+DEF_HELPER_3(ukadd16, tl, env, tl, tl)
+DEF_HELPER_3(rsub16, tl, env, tl, tl)
+DEF_HELPER_3(ursub16, tl, env, tl, tl)
+DEF_HELPER_3(ksub16, tl, env, tl, tl)
+DEF_HELPER_3(uksub16, tl, env, tl, tl)
+DEF_HELPER_3(cras16, tl, env, tl, tl)
+DEF_HELPER_3(rcras16, tl, env, tl, tl)
+DEF_HELPER_3(urcras16, tl, env, tl, tl)
+DEF_HELPER_3(kcras16, tl, env, tl, tl)
+DEF_HELPER_3(ukcras16, tl, env, tl, tl)
+DEF_HELPER_3(crsa16, tl, env, tl, tl)
+DEF_HELPER_3(rcrsa16, tl, env, tl, tl)
+DEF_HELPER_3(urcrsa16, tl, env, tl, tl)
+DEF_HELPER_3(kcrsa16, tl, env, tl, tl)
+DEF_HELPER_3(ukcrsa16, tl, env, tl, tl)
+DEF_HELPER_3(stas16, tl, env, tl, tl)
+DEF_HELPER_3(rstas16, tl, env, tl, tl)
+DEF_HELPER_3(urstas16, tl, env, tl, tl)
+DEF_HELPER_3(kstas16, tl, env, tl, tl)
+DEF_HELPER_3(ukstas16, tl, env, tl, tl)
+DEF_HELPER_3(stsa16, tl, env, tl, tl)
+DEF_HELPER_3(rstsa16, tl, env, tl, tl)
+DEF_HELPER_3(urstsa16, tl, env, tl, tl)
+DEF_HELPER_3(kstsa16, tl, env, tl, tl)
+DEF_HELPER_3(ukstsa16, tl, env, tl, tl)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index f09f8d5faf..57f72fabf6 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -732,3 +732,35 @@ greviw     0110100 .......... 101 ..... 0011011 @sh5
 gorciw     0010100 .......... 101 ..... 0011011 @sh5
 
 slli_uw    00001. ........... 001 ..... 0011011 @sh
+
+# *** RV32P Extension ***
+add16      0100000  ..... ..... 000 ..... 1110111 @r
+radd16     0000000  ..... ..... 000 ..... 1110111 @r
+uradd16    0010000  ..... ..... 000 ..... 1110111 @r
+kadd16     0001000  ..... ..... 000 ..... 1110111 @r
+ukadd16    0011000  ..... ..... 000 ..... 1110111 @r
+sub16      0100001  ..... ..... 000 ..... 1110111 @r
+rsub16     0000001  ..... ..... 000 ..... 1110111 @r
+ursub16    0010001  ..... ..... 000 ..... 1110111 @r
+ksub16     0001001  ..... ..... 000 ..... 1110111 @r
+uksub16    0011001  ..... ..... 000 ..... 1110111 @r
+cras16     0100010  ..... ..... 000 ..... 1110111 @r
+rcras16    0000010  ..... ..... 000 ..... 1110111 @r
+urcras16   0010010  ..... ..... 000 ..... 1110111 @r
+kcras16    0001010  ..... ..... 000 ..... 1110111 @r
+ukcras16   0011010  ..... ..... 000 ..... 1110111 @r
+crsa16     0100011  ..... ..... 000 ..... 1110111 @r
+rcrsa16    0000011  ..... ..... 000 ..... 1110111 @r
+urcrsa16   0010011  ..... ..... 000 ..... 1110111 @r
+kcrsa16    0001011  ..... ..... 000 ..... 1110111 @r
+ukcrsa16   0011011  ..... ..... 000 ..... 1110111 @r
+stas16     1111010  ..... ..... 010 ..... 1110111 @r
+rstas16    1011010  ..... ..... 010 ..... 1110111 @r
+urstas16   1101010  ..... ..... 010 ..... 1110111 @r
+kstas16    1100010  ..... ..... 010 ..... 1110111 @r
+ukstas16   1110010  ..... ..... 010 ..... 1110111 @r
+stsa16     1111011  ..... ..... 010 ..... 1110111 @r
+rstsa16    1011011  ..... ..... 010 ..... 1110111 @r
+urstsa16   1101011  ..... ..... 010 ..... 1110111 @r
+kstsa16    1100011  ..... ..... 010 ..... 1110111 @r
+ukstsa16   1110011  ..... ..... 010 ..... 1110111 @r
diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc
new file mode 100644
index 0000000000..43f395657a
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvp.c.inc
@@ -0,0 +1,117 @@
+/*
+ * RISC-V translation routines for the RVP Standard Extension.
+ *
+ * Copyright (c) 2021 T-Head Semiconductor Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "tcg/tcg-op-gvec.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "tcg/tcg.h"
+
+/*
+ *** SIMD Data Processing Instructions
+ */
+
+/* 16-bit Addition & Subtraction Instructions */
+
+/*
+ * For some instructions, such as add16, an oberservation can be utilized:
+ * 1) If any reg is zero, it can be reduced to an inline op on the whole reg.
+ * 2) Otherwise, it can be acclebrated by an vec op.
+ */
+static inline bool
+r_inline(DisasContext *ctx, arg_r *a,
+         void (* vecop)(TCGv, TCGv, TCGv),
+         void (* op)(TCGv, TCGv, TCGv))
+{
+    if (!has_ext(ctx, RVP)) {
+        return false;
+    }
+    if (a->rd && a->rs1 && a->rs2) {
+        vecop(cpu_gpr[a->rd], cpu_gpr[a->rs1], cpu_gpr[a->rs2]);
+    } else {
+        gen_arith(ctx, a, op);
+    }
+    return true;
+}
+
+/* Complete inline implementation */
+#define GEN_RVP_R_INLINE(NAME, VECOP, OP)                \
+static bool trans_##NAME(DisasContext *s, arg_r *a)      \
+{                                                        \
+    return r_inline(s, a, VECOP, OP);                    \
+}
+
+GEN_RVP_R_INLINE(add16, tcg_gen_vec_add16_tl, tcg_gen_add_tl);
+GEN_RVP_R_INLINE(sub16, tcg_gen_vec_sub16_tl, tcg_gen_sub_tl);
+
+/* Out of line helpers for R format packed instructions */
+static inline bool
+r_ool(DisasContext *ctx, arg_r *a, void (* fn)(TCGv, TCGv_ptr, TCGv, TCGv))
+{
+    TCGv src1, src2, dst;
+    if (!has_ext(ctx, RVP)) {
+        return false;
+    }
+
+    src1 = tcg_temp_new();
+    src2 = tcg_temp_new();
+    dst = tcg_temp_new();
+
+    gen_get_gpr(src1, a->rs1);
+    gen_get_gpr(src2, a->rs2);
+    fn(dst, cpu_env, src1, src2);
+    gen_set_gpr(a->rd, dst);
+
+    tcg_temp_free(src1);
+    tcg_temp_free(src2);
+    tcg_temp_free(dst);
+    return true;
+}
+
+#define GEN_RVP_R_OOL(NAME)                            \
+static bool trans_##NAME(DisasContext *s, arg_r *a)    \
+{                                                      \
+    return r_ool(s, a, gen_helper_##NAME);             \
+}
+
+GEN_RVP_R_OOL(radd16);
+GEN_RVP_R_OOL(uradd16);
+GEN_RVP_R_OOL(kadd16);
+GEN_RVP_R_OOL(ukadd16);
+GEN_RVP_R_OOL(rsub16);
+GEN_RVP_R_OOL(ursub16);
+GEN_RVP_R_OOL(ksub16);
+GEN_RVP_R_OOL(uksub16);
+GEN_RVP_R_OOL(cras16);
+GEN_RVP_R_OOL(rcras16);
+GEN_RVP_R_OOL(urcras16);
+GEN_RVP_R_OOL(kcras16);
+GEN_RVP_R_OOL(ukcras16);
+GEN_RVP_R_OOL(crsa16);
+GEN_RVP_R_OOL(rcrsa16);
+GEN_RVP_R_OOL(urcrsa16);
+GEN_RVP_R_OOL(kcrsa16);
+GEN_RVP_R_OOL(ukcrsa16);
+GEN_RVP_R_OOL(stas16);
+GEN_RVP_R_OOL(rstas16);
+GEN_RVP_R_OOL(urstas16);
+GEN_RVP_R_OOL(kstas16);
+GEN_RVP_R_OOL(ukstas16);
+GEN_RVP_R_OOL(stsa16);
+GEN_RVP_R_OOL(rstsa16);
+GEN_RVP_R_OOL(urstsa16);
+GEN_RVP_R_OOL(kstsa16);
+GEN_RVP_R_OOL(ukstsa16);
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index d5e0bc93ea..cc169e1b2c 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -17,6 +17,7 @@ riscv_ss.add(files(
   'op_helper.c',
   'vector_helper.c',
   'bitmanip_helper.c',
+  'packed_helper.c',
   'translate.c',
 ))
 
diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
new file mode 100644
index 0000000000..b84abaaf25
--- /dev/null
+++ b/target/riscv/packed_helper.c
@@ -0,0 +1,354 @@
+/*
+ * RISC-V P Extension Helpers for QEMU.
+ *
+ * Copyright (c) 2021 T-Head Semiconductor Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
+#include "fpu/softfloat.h"
+#include <math.h>
+#include "internals.h"
+
+/*
+ *** SIMD Data Processing Instructions
+ */
+
+/* 16-bit Addition & Subtraction Instructions */
+typedef void PackedFn3i(CPURISCVState *, void *, void *, void *, uint8_t);
+
+/* Define a common function to loop elements in packed register */
+static inline target_ulong
+rvpr(CPURISCVState *env, target_ulong a, target_ulong b,
+     uint8_t step, uint8_t size, PackedFn3i *fn)
+{
+    int i, passes = sizeof(target_ulong) / size;
+    target_ulong result = 0;
+
+    for (i = 0; i < passes; i += step) {
+        fn(env, &result, &a, &b, i);
+    }
+    return result;
+}
+
+#define RVPR(NAME, STEP, SIZE)                                  \
+target_ulong HELPER(NAME)(CPURISCVState *env, target_ulong a,   \
+                          target_ulong b)                       \
+{                                                               \
+    return rvpr(env, a, b, STEP, SIZE, (PackedFn3i *)do_##NAME);\
+}
+
+static inline int32_t hadd32(int32_t a, int32_t b)
+{
+    return ((int64_t)a + b) >> 1;
+}
+
+static inline void do_radd16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[i] = hadd32(a[i], b[i]);
+}
+
+RVPR(radd16, 1, 2);
+
+static inline uint32_t haddu32(uint32_t a, uint32_t b)
+{
+    return ((uint64_t)a + b) >> 1;
+}
+
+static inline void do_uradd16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[i] = haddu32(a[i], b[i]);
+}
+
+RVPR(uradd16, 1, 2);
+
+static inline void do_kadd16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[i] = sadd16(env, 0, a[i], b[i]);
+}
+
+RVPR(kadd16, 1, 2);
+
+static inline void do_ukadd16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[i] = saddu16(env, 0, a[i], b[i]);
+}
+
+RVPR(ukadd16, 1, 2);
+
+static inline int32_t hsub32(int32_t a, int32_t b)
+{
+    return ((int64_t)a - b) >> 1;
+}
+
+static inline int64_t hsub64(int64_t a, int64_t b)
+{
+    int64_t res = a - b;
+    int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
+
+    /* With signed overflow, bit 64 is inverse of bit 63. */
+    return (res >> 1) ^ over;
+}
+
+static inline void do_rsub16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[i] = hsub32(a[i], b[i]);
+}
+
+RVPR(rsub16, 1, 2);
+
+static inline uint64_t hsubu64(uint64_t a, uint64_t b)
+{
+    return (a - b) >> 1;
+}
+
+static inline void do_ursub16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[i] = hsubu64(a[i], b[i]);
+}
+
+RVPR(ursub16, 1, 2);
+
+static inline void do_ksub16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[i] = ssub16(env, 0, a[i], b[i]);
+}
+
+RVPR(ksub16, 1, 2);
+
+static inline void do_uksub16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[i] = ssubu16(env, 0, a[i], b[i]);
+}
+
+RVPR(uksub16, 1, 2);
+
+static inline void do_cras16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = a[H2(i)] - b[H2(i + 1)];
+    d[H2(i + 1)] = a[H2(i + 1)] + b[H2(i)];
+}
+
+RVPR(cras16, 2, 2);
+
+static inline void do_rcras16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = hsub32(a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = hadd32(a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(rcras16, 2, 2);
+
+static inline void do_urcras16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = hsubu64(a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = haddu32(a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(urcras16, 2, 2);
+
+static inline void do_kcras16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = ssub16(env, 0, a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = sadd16(env, 0, a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(kcras16, 2, 2);
+
+static inline void do_ukcras16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = ssubu16(env, 0, a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = saddu16(env, 0, a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(ukcras16, 2, 2);
+
+static inline void do_crsa16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = a[H2(i)] + b[H2(i + 1)];
+    d[H2(i + 1)] = a[H2(i + 1)] - b[H2(i)];
+}
+
+RVPR(crsa16, 2, 2);
+
+static inline void do_rcrsa16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = hadd32(a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = hsub32(a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(rcrsa16, 2, 2);
+
+static inline void do_urcrsa16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = haddu32(a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = hsubu64(a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(urcrsa16, 2, 2);
+
+static inline void do_kcrsa16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = sadd16(env, 0, a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = ssub16(env, 0, a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(kcrsa16, 2, 2);
+
+static inline void do_ukcrsa16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = saddu16(env, 0, a[H2(i)], b[H2(i + 1)]);
+    d[H2(i + 1)] = ssubu16(env, 0, a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(ukcrsa16, 2, 2);
+
+static inline void do_stas16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = a[H2(i)] - b[H2(i)];
+    d[H2(i + 1)] = a[H2(i + 1)] + b[H2(i + 1)];
+}
+
+RVPR(stas16, 2, 2);
+
+static inline void do_rstas16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = hsub32(a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = hadd32(a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(rstas16, 2, 2);
+
+static inline void do_urstas16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = hsubu64(a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = haddu32(a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(urstas16, 2, 2);
+
+static inline void do_kstas16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = ssub16(env, 0, a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = sadd16(env, 0, a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(kstas16, 2, 2);
+
+static inline void do_ukstas16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = ssubu16(env, 0, a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = saddu16(env, 0, a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(ukstas16, 2, 2);
+
+static inline void do_stsa16(CPURISCVState *env, void *vd, void *va,
+                             void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = a[H2(i)] + b[H2(i)];
+    d[H2(i + 1)] = a[H2(i + 1)] - b[H2(i + 1)];
+}
+
+RVPR(stsa16, 2, 2);
+
+static inline void do_rstsa16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = hadd32(a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = hsub32(a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(rstsa16, 2, 2);
+
+static inline void do_urstsa16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = haddu32(a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = hsubu64(a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(urstsa16, 2, 2);
+
+static inline void do_kstsa16(CPURISCVState *env, void *vd, void *va,
+                              void *vb, uint8_t i)
+{
+    int16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = sadd16(env, 0, a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = ssub16(env, 0, a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(kstsa16, 2, 2);
+
+static inline void do_ukstsa16(CPURISCVState *env, void *vd, void *va,
+                               void *vb, uint8_t i)
+{
+    uint16_t *d = vd, *a = va, *b = vb;
+    d[H2(i)] = saddu16(env, 0, a[H2(i)], b[H2(i)]);
+    d[H2(i + 1)] = ssubu16(env, 0, a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(ukstsa16, 2, 2);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 0e6ede4d71..51b144e9be 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -908,6 +908,7 @@ static bool gen_unary(DisasContext *ctx, arg_r2 *a,
 #include "insn_trans/trans_rvh.c.inc"
 #include "insn_trans/trans_rvv.c.inc"
 #include "insn_trans/trans_rvb.c.inc"
+#include "insn_trans/trans_rvp.c.inc"
 #include "insn_trans/trans_privileged.c.inc"
 
 /* Include the auto-generated decoder for 16 bit insn */
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 498a959839..a8898ba7bf 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -1742,6 +1742,20 @@ void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_addv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    tcg_gen_andi_i32(t1, a, ~0xffff);
+    tcg_gen_add_i32(t2, a, b);
+    tcg_gen_add_i32(t1, t1, b);
+    tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
 void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 t1 = tcg_temp_new_i64();
@@ -1892,6 +1906,20 @@ void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_subv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    tcg_gen_andi_i32(t1, b, ~0xffff);
+    tcg_gen_sub_i32(t2, a, b);
+    tcg_gen_sub_i32(t1, a, t1);
+    tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
 void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 t1 = tcg_temp_new_i64();
-- 
2.25.1



  parent reply	other threads:[~2021-06-10  8:04 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-10  7:58 [PATCH v2 00/37] target/riscv: support packed extension v0.9.4 LIU Zhiwei
2021-06-10  7:58 ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 01/37] target/riscv: implementation-defined constant parameters LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 02/37] target/riscv: Make the vector helper functions public LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` LIU Zhiwei [this message]
2021-06-10  7:58   ` [PATCH v2 03/37] target/riscv: 16-bit Addition & Subtraction Instructions LIU Zhiwei
2021-06-10 18:00   ` Richard Henderson
2021-06-10 18:00     ` Richard Henderson
2021-06-10  7:58 ` [PATCH v2 04/37] target/riscv: 8-bit Addition & Subtraction Instruction LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10 19:39   ` Richard Henderson
2021-06-10 19:39     ` Richard Henderson
2021-06-11  4:36     ` LIU Zhiwei
2021-06-11  4:36       ` LIU Zhiwei
2021-06-24  6:05     ` LIU Zhiwei
2021-06-24  6:05       ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 05/37] target/riscv: SIMD 16-bit Shift Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10 19:44   ` Richard Henderson
2021-06-10 19:44     ` Richard Henderson
2021-06-10  7:58 ` [PATCH v2 06/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 07/37] target/riscv: SIMD 16-bit Compare Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 08/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 09/37] target/riscv: SIMD 16-bit Multiply Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 10/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 11/37] target/riscv: SIMD 16-bit Miscellaneous Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 12/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 13/37] target/riscv: 8-bit Unpacking Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 14/37] target/riscv: 16-bit Packing Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 15/37] target/riscv: Signed MSW 32x32 Multiply and Add Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 16/37] target/riscv: Signed MSW 32x16 " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 17/37] target/riscv: Signed 16-bit Multiply 32-bit Add/Subtract Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 18/37] target/riscv: Signed 16-bit Multiply 64-bit " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 19/37] target/riscv: Partial-SIMD Miscellaneous Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 20/37] target/riscv: 8-bit Multiply with 32-bit Add Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 21/37] target/riscv: 64-bit Add/Subtract Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 22/37] target/riscv: 32-bit Multiply " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 23/37] target/riscv: Signed 16-bit Multiply with " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 24/37] target/riscv: Non-SIMD Q15 saturation ALU Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 25/37] target/riscv: Non-SIMD Q31 " LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 26/37] target/riscv: 32-bit Computation Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 27/37] target/riscv: Non-SIMD Miscellaneous Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:58 ` [PATCH v2 28/37] target/riscv: RV64 Only SIMD 32-bit Add/Subtract Instructions LIU Zhiwei
2021-06-10  7:58   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 29/37] target/riscv: RV64 Only SIMD 32-bit Shift Instructions LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 30/37] target/riscv: RV64 Only SIMD 32-bit Miscellaneous Instructions LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 31/37] target/riscv: RV64 Only SIMD Q15 saturating Multiply Instructions LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 32/37] target/riscv: RV64 Only 32-bit " LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 33/37] target/riscv: RV64 Only 32-bit Multiply & Add Instructions LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 34/37] target/riscv: RV64 Only 32-bit Parallel " LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 35/37] target/riscv: RV64 Only Non-SIMD 32-bit Shift Instructions LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 36/37] target/riscv: RV64 Only 32-bit Packing Instructions LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-10  7:59 ` [PATCH v2 37/37] target/riscv: configure and turn on packed extension from command line LIU Zhiwei
2021-06-10  7:59   ` LIU Zhiwei
2021-06-14 22:55 ` [PATCH v2 00/37] target/riscv: support packed extension v0.9.4 no-reply
2021-06-14 22:55   ` no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210610075908.3305506-4-zhiwei_liu@c-sky.com \
    --to=zhiwei_liu@c-sky.com \
    --cc=Alistair.Francis@wdc.com \
    --cc=bin.meng@windriver.com \
    --cc=palmer@dabbelt.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-riscv@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.