All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
To: qemu-devel@nongnu.org
Cc: dickon.hood@codethink.co.uk, nazar.kazakov@codethink.co.uk,
	kiran.ostrolenk@codethink.co.uk, frank.chang@sifive.com,
	palmer@dabbelt.com, alistair.francis@wdc.com,
	bin.meng@windriver.com, pbonzini@redhat.com,
	philipp.tomsich@vrull.eu, kvm@vger.kernel.org
Subject: [PATCH 06/39] target/riscv: Add vrol.[vv,vx] and vror.[vv,vx,vi] decoding, translation and execution support
Date: Thu,  2 Feb 2023 12:41:57 +0000	[thread overview]
Message-ID: <20230202124230.295997-7-lawrence.hunter@codethink.co.uk> (raw)
In-Reply-To: <20230202124230.295997-1-lawrence.hunter@codethink.co.uk>

From: Dickon Hood <dickon.hood@codethink.co.uk>

Add an initial implementation of the vrol.* and vror.* instructions,
with mappings between the RISC-V instructions and their internal TCG
accelerated implmentations.

There are some missing ror helpers, so I've bodged it by converting them
to rols.

Co-authored-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
---
 target/riscv/helper.h                      | 20 ++++++++
 target/riscv/insn32.decode                 |  6 +++
 target/riscv/insn_trans/trans_rvzvkb.c.inc | 20 ++++++++
 target/riscv/vcrypto_helper.c              | 58 ++++++++++++++++++++++
 target/riscv/vector_helper.c               | 45 -----------------
 target/riscv/vector_internals.h            | 52 +++++++++++++++++++
 6 files changed, 156 insertions(+), 45 deletions(-)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 32f1179e29..e5b6b3360f 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1142,3 +1142,23 @@ DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vror_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vror_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vror_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vror_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vror_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vror_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vror_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vror_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vrol_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrol_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrol_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrol_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vrol_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrol_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrol_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrol_vx_d, void, ptr, ptr, tl, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index b4d88dd1cb..725f907ad1 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -896,3 +896,9 @@ vclmul_vv       001100 . ..... ..... 010 ..... 1010111 @r_vm
 vclmul_vx       001100 . ..... ..... 110 ..... 1010111 @r_vm
 vclmulh_vv      001101 . ..... ..... 010 ..... 1010111 @r_vm
 vclmulh_vx      001101 . ..... ..... 110 ..... 1010111 @r_vm
+vrol_vv         010101 . ..... ..... 000 ..... 1010111 @r_vm
+vrol_vx         010101 . ..... ..... 100 ..... 1010111 @r_vm
+vror_vv         010100 . ..... ..... 000 ..... 1010111 @r_vm
+vror_vx         010100 . ..... ..... 100 ..... 1010111 @r_vm
+vror_vi         010100 . ..... ..... 011 ..... 1010111 @r_vm
+vror_vi2        010101 . ..... ..... 011 ..... 1010111 @r_vm
diff --git a/target/riscv/insn_trans/trans_rvzvkb.c.inc b/target/riscv/insn_trans/trans_rvzvkb.c.inc
index 533141e559..d2a7a92d42 100644
--- a/target/riscv/insn_trans/trans_rvzvkb.c.inc
+++ b/target/riscv/insn_trans/trans_rvzvkb.c.inc
@@ -95,3 +95,23 @@ static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a)
 
 GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check)
 GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check)
+
+GEN_OPIVV_TRANS(vror_vv, zvkb_vv_check)
+GEN_OPIVX_TRANS(vror_vx, zvkb_vx_check)
+GEN_OPIVV_TRANS(vrol_vv, zvkb_vv_check)
+GEN_OPIVX_TRANS(vrol_vx, zvkb_vx_check)
+
+GEN_OPIVI_TRANS(vror_vi, IMM_TRUNC_SEW, vror_vx, zvkb_vx_check)
+
+/*
+ * Immediates are 5b long, and we need six for the rotate-immediate.  The
+ * decision has been taken to remove the vrol.vi instruction -- you can
+ * emulate it with a ror, after all -- and use the bottom bit of the funct6
+ * part of the opcode to encode the extra bit.  I've chosen to implement it
+ * like this because it's easy and reasonably clean.
+ */
+static bool trans_vror_vi2(DisasContext *s, arg_rmrr *a)
+{
+    a->rs1 += 32;
+    return trans_vror_vi(s, a);
+}
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
index 46e2e510c5..7ec75c5589 100644
--- a/target/riscv/vcrypto_helper.c
+++ b/target/riscv/vcrypto_helper.c
@@ -57,3 +57,61 @@ GEN_VEXT_VV(vclmul_vv, 8)
 GEN_VEXT_VX(vclmul_vx, 8)
 GEN_VEXT_VV(vclmulh_vv, 8)
 GEN_VEXT_VX(vclmulh_vx, 8)
+
+/*
+ *  Looks a mess, but produces reasonable (aarch32) code on clang:
+ * https://godbolt.org/z/jchjsTda8
+ */
+#define DO_ROR(x, n)                       \
+    ((x >> (n & ((sizeof(x) << 3) - 1))) | \
+     (x << ((sizeof(x) << 3) - (n & ((sizeof(x) << 3) - 1)))))
+#define DO_ROL(x, n)                       \
+    ((x << (n & ((sizeof(x) << 3) - 1))) | \
+     (x >> ((sizeof(x) << 3) - (n & ((sizeof(x) << 3) - 1)))))
+
+RVVCALL(OPIVV2, vror_vv_b, OP_UUU_B, H1, H1, H1, DO_ROR)
+RVVCALL(OPIVV2, vror_vv_h, OP_UUU_H, H2, H2, H2, DO_ROR)
+RVVCALL(OPIVV2, vror_vv_w, OP_UUU_W, H4, H4, H4, DO_ROR)
+RVVCALL(OPIVV2, vror_vv_d, OP_UUU_D, H8, H8, H8, DO_ROR)
+GEN_VEXT_VV(vror_vv_b, 1)
+GEN_VEXT_VV(vror_vv_h, 2)
+GEN_VEXT_VV(vror_vv_w, 4)
+GEN_VEXT_VV(vror_vv_d, 8)
+
+/*
+ * There's a missing tcg_gen_gvec_rotrs() helper function.
+ */
+#define GEN_VEXT_VX_RTOL(NAME, ESZ)                                      \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
+                  CPURISCVState *env, uint32_t desc)                     \
+{                                                                        \
+    do_vext_vx(vd, v0, (ESZ << 3) - s1, vs2, env, desc, do_##NAME, ESZ); \
+}
+
+/* DO_ROL because GEN_VEXT_VX_RTOL() converts from R to L */
+RVVCALL(OPIVX2, vror_vx_b, OP_UUU_B, H1, H1, DO_ROL)
+RVVCALL(OPIVX2, vror_vx_h, OP_UUU_H, H2, H2, DO_ROL)
+RVVCALL(OPIVX2, vror_vx_w, OP_UUU_W, H4, H4, DO_ROL)
+RVVCALL(OPIVX2, vror_vx_d, OP_UUU_D, H8, H8, DO_ROL)
+GEN_VEXT_VX_RTOL(vror_vx_b, 1)
+GEN_VEXT_VX_RTOL(vror_vx_h, 2)
+GEN_VEXT_VX_RTOL(vror_vx_w, 4)
+GEN_VEXT_VX_RTOL(vror_vx_d, 8)
+
+RVVCALL(OPIVV2, vrol_vv_b, OP_UUU_B, H1, H1, H1, DO_ROL)
+RVVCALL(OPIVV2, vrol_vv_h, OP_UUU_H, H2, H2, H2, DO_ROL)
+RVVCALL(OPIVV2, vrol_vv_w, OP_UUU_W, H4, H4, H4, DO_ROL)
+RVVCALL(OPIVV2, vrol_vv_d, OP_UUU_D, H8, H8, H8, DO_ROL)
+GEN_VEXT_VV(vrol_vv_b, 1)
+GEN_VEXT_VV(vrol_vv_h, 2)
+GEN_VEXT_VV(vrol_vv_w, 4)
+GEN_VEXT_VV(vrol_vv_d, 8)
+
+RVVCALL(OPIVX2, vrol_vx_b, OP_UUU_B, H1, H1, DO_ROL)
+RVVCALL(OPIVX2, vrol_vx_h, OP_UUU_H, H2, H2, DO_ROL)
+RVVCALL(OPIVX2, vrol_vx_w, OP_UUU_W, H4, H4, DO_ROL)
+RVVCALL(OPIVX2, vrol_vx_d, OP_UUU_D, H8, H8, DO_ROL)
+GEN_VEXT_VX(vrol_vx_b, 1)
+GEN_VEXT_VX(vrol_vx_h, 2)
+GEN_VEXT_VX(vrol_vx_w, 4)
+GEN_VEXT_VX(vrol_vx_d, 8)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index ab470092f6..ff7b03cbe3 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -76,26 +76,6 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
     return vl;
 }
 
-/*
- * Note that vector data is stored in host-endian 64-bit chunks,
- * so addressing units smaller than that needs a host-endian fixup.
- */
-#if HOST_BIG_ENDIAN
-#define H1(x)   ((x) ^ 7)
-#define H1_2(x) ((x) ^ 6)
-#define H1_4(x) ((x) ^ 4)
-#define H2(x)   ((x) ^ 3)
-#define H4(x)   ((x) ^ 1)
-#define H8(x)   ((x))
-#else
-#define H1(x)   (x)
-#define H1_2(x) (x)
-#define H1_4(x) (x)
-#define H2(x)   (x)
-#define H4(x)   (x)
-#define H8(x)   (x)
-#endif
-
 /*
  * Get the maximum number of elements can be operated.
  *
@@ -683,18 +663,11 @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
  *** Vector Integer Arithmetic Instructions
  */
 
-/* expand macro args before macro */
-#define RVVCALL(macro, ...)  macro(__VA_ARGS__)
-
 /* (TD, T1, T2, TX1, TX2) */
 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
-#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
-#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
-#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
-#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
@@ -718,14 +691,6 @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
 
-
-#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
-static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
-{                                                               \
-    TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
-    TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
-    *((TD *)vd + HD(i)) = OP(s2, s1);                           \
-}
 #define DO_SUB(N, M) (N - M)
 #define DO_RSUB(N, M) (M - N)
 
@@ -747,16 +712,6 @@ GEN_VEXT_VV(vsub_vv_h, 2)
 GEN_VEXT_VV(vsub_vv_w, 4)
 GEN_VEXT_VV(vsub_vv_d, 8)
 
-/*
- * (T1)s1 gives the real operator type.
- * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
- */
-#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
-static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
-{                                                                   \
-    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
-    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
-}
 
 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h
index 49529d2379..a0fbac7bf3 100644
--- a/target/riscv/vector_internals.h
+++ b/target/riscv/vector_internals.h
@@ -28,6 +28,26 @@ static inline uint32_t vext_nf(uint32_t desc)
     return FIELD_EX32(simd_data(desc), VDATA, NF);
 }
 
+/*
+ * Note that vector data is stored in host-endian 64-bit chunks,
+ * so addressing units smaller than that needs a host-endian fixup.
+ */
+#if HOST_BIG_ENDIAN
+#define H1(x)   ((x) ^ 7)
+#define H1_2(x) ((x) ^ 6)
+#define H1_4(x) ((x) ^ 4)
+#define H2(x)   ((x) ^ 3)
+#define H4(x)   ((x) ^ 1)
+#define H8(x)   ((x))
+#else
+#define H1(x)   (x)
+#define H1_2(x) (x)
+#define H1_4(x) (x)
+#define H2(x)   (x)
+#define H4(x)   (x)
+#define H8(x)   (x)
+#endif
+
 /*
  * Encode LMUL to lmul as following:
  *     LMUL    vlmul    lmul
@@ -96,9 +116,30 @@ static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
 void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
                        uint32_t tot);
 
+/*
+ *** Vector Integer Arithmetic Instructions
+ */
+
+/* expand macro args before macro */
+#define RVVCALL(macro, ...)  macro(__VA_ARGS__)
+
+/* (TD, T1, T2, TX1, TX2) */
+#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
+#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
+#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
+#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
+
 /* operation of two vector elements */
 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
 
+#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
+{                                                               \
+    TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
+    *((TD *)vd + HD(i)) = OP(s2, s1);                           \
+}
+
 void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
                 CPURISCVState *env, uint32_t desc,
                 opivv2_fn *fn, uint32_t esz);
@@ -115,6 +156,17 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
 
 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
 
+/*
+ * (T1)s1 gives the real operator type.
+ * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
+ */
+#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
+static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
+{                                                                   \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
+    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
+}
+
 void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
                 CPURISCVState *env, uint32_t desc,
                 opivx2_fn fn, uint32_t esz);
-- 
2.39.1


WARNING: multiple messages have this Message-ID (diff)
From: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
To: qemu-devel@nongnu.org
Cc: dickon.hood@codethink.co.uk, nazar.kazakov@codethink.co.uk,
	kiran.ostrolenk@codethink.co.uk, frank.chang@sifive.com,
	palmer@dabbelt.com, alistair.francis@wdc.com,
	bin.meng@windriver.com, pbonzini@redhat.com,
	philipp.tomsich@vrull.eu, kvm@vger.kernel.org
Subject: [PATCH 06/39] target/riscv: Add vrol.[vv, vx] and vror.[vv, vx, vi] decoding, translation and execution support
Date: Thu,  2 Feb 2023 12:41:57 +0000	[thread overview]
Message-ID: <20230202124230.295997-7-lawrence.hunter@codethink.co.uk> (raw)
In-Reply-To: <20230202124230.295997-1-lawrence.hunter@codethink.co.uk>

From: Dickon Hood <dickon.hood@codethink.co.uk>

Add an initial implementation of the vrol.* and vror.* instructions,
with mappings between the RISC-V instructions and their internal TCG
accelerated implmentations.

There are some missing ror helpers, so I've bodged it by converting them
to rols.

Co-authored-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
---
 target/riscv/helper.h                      | 20 ++++++++
 target/riscv/insn32.decode                 |  6 +++
 target/riscv/insn_trans/trans_rvzvkb.c.inc | 20 ++++++++
 target/riscv/vcrypto_helper.c              | 58 ++++++++++++++++++++++
 target/riscv/vector_helper.c               | 45 -----------------
 target/riscv/vector_internals.h            | 52 +++++++++++++++++++
 6 files changed, 156 insertions(+), 45 deletions(-)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 32f1179e29..e5b6b3360f 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1142,3 +1142,23 @@ DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vror_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vror_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vror_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vror_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vror_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vror_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vror_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vror_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vrol_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrol_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrol_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrol_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vrol_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrol_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrol_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrol_vx_d, void, ptr, ptr, tl, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index b4d88dd1cb..725f907ad1 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -896,3 +896,9 @@ vclmul_vv       001100 . ..... ..... 010 ..... 1010111 @r_vm
 vclmul_vx       001100 . ..... ..... 110 ..... 1010111 @r_vm
 vclmulh_vv      001101 . ..... ..... 010 ..... 1010111 @r_vm
 vclmulh_vx      001101 . ..... ..... 110 ..... 1010111 @r_vm
+vrol_vv         010101 . ..... ..... 000 ..... 1010111 @r_vm
+vrol_vx         010101 . ..... ..... 100 ..... 1010111 @r_vm
+vror_vv         010100 . ..... ..... 000 ..... 1010111 @r_vm
+vror_vx         010100 . ..... ..... 100 ..... 1010111 @r_vm
+vror_vi         010100 . ..... ..... 011 ..... 1010111 @r_vm
+vror_vi2        010101 . ..... ..... 011 ..... 1010111 @r_vm
diff --git a/target/riscv/insn_trans/trans_rvzvkb.c.inc b/target/riscv/insn_trans/trans_rvzvkb.c.inc
index 533141e559..d2a7a92d42 100644
--- a/target/riscv/insn_trans/trans_rvzvkb.c.inc
+++ b/target/riscv/insn_trans/trans_rvzvkb.c.inc
@@ -95,3 +95,23 @@ static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a)
 
 GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check)
 GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check)
+
+GEN_OPIVV_TRANS(vror_vv, zvkb_vv_check)
+GEN_OPIVX_TRANS(vror_vx, zvkb_vx_check)
+GEN_OPIVV_TRANS(vrol_vv, zvkb_vv_check)
+GEN_OPIVX_TRANS(vrol_vx, zvkb_vx_check)
+
+GEN_OPIVI_TRANS(vror_vi, IMM_TRUNC_SEW, vror_vx, zvkb_vx_check)
+
+/*
+ * Immediates are 5b long, and we need six for the rotate-immediate.  The
+ * decision has been taken to remove the vrol.vi instruction -- you can
+ * emulate it with a ror, after all -- and use the bottom bit of the funct6
+ * part of the opcode to encode the extra bit.  I've chosen to implement it
+ * like this because it's easy and reasonably clean.
+ */
+static bool trans_vror_vi2(DisasContext *s, arg_rmrr *a)
+{
+    a->rs1 += 32;
+    return trans_vror_vi(s, a);
+}
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
index 46e2e510c5..7ec75c5589 100644
--- a/target/riscv/vcrypto_helper.c
+++ b/target/riscv/vcrypto_helper.c
@@ -57,3 +57,61 @@ GEN_VEXT_VV(vclmul_vv, 8)
 GEN_VEXT_VX(vclmul_vx, 8)
 GEN_VEXT_VV(vclmulh_vv, 8)
 GEN_VEXT_VX(vclmulh_vx, 8)
+
+/*
+ *  Looks a mess, but produces reasonable (aarch32) code on clang:
+ * https://godbolt.org/z/jchjsTda8
+ */
+#define DO_ROR(x, n)                       \
+    ((x >> (n & ((sizeof(x) << 3) - 1))) | \
+     (x << ((sizeof(x) << 3) - (n & ((sizeof(x) << 3) - 1)))))
+#define DO_ROL(x, n)                       \
+    ((x << (n & ((sizeof(x) << 3) - 1))) | \
+     (x >> ((sizeof(x) << 3) - (n & ((sizeof(x) << 3) - 1)))))
+
+RVVCALL(OPIVV2, vror_vv_b, OP_UUU_B, H1, H1, H1, DO_ROR)
+RVVCALL(OPIVV2, vror_vv_h, OP_UUU_H, H2, H2, H2, DO_ROR)
+RVVCALL(OPIVV2, vror_vv_w, OP_UUU_W, H4, H4, H4, DO_ROR)
+RVVCALL(OPIVV2, vror_vv_d, OP_UUU_D, H8, H8, H8, DO_ROR)
+GEN_VEXT_VV(vror_vv_b, 1)
+GEN_VEXT_VV(vror_vv_h, 2)
+GEN_VEXT_VV(vror_vv_w, 4)
+GEN_VEXT_VV(vror_vv_d, 8)
+
+/*
+ * There's a missing tcg_gen_gvec_rotrs() helper function.
+ */
+#define GEN_VEXT_VX_RTOL(NAME, ESZ)                                      \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
+                  CPURISCVState *env, uint32_t desc)                     \
+{                                                                        \
+    do_vext_vx(vd, v0, (ESZ << 3) - s1, vs2, env, desc, do_##NAME, ESZ); \
+}
+
+/* DO_ROL because GEN_VEXT_VX_RTOL() converts from R to L */
+RVVCALL(OPIVX2, vror_vx_b, OP_UUU_B, H1, H1, DO_ROL)
+RVVCALL(OPIVX2, vror_vx_h, OP_UUU_H, H2, H2, DO_ROL)
+RVVCALL(OPIVX2, vror_vx_w, OP_UUU_W, H4, H4, DO_ROL)
+RVVCALL(OPIVX2, vror_vx_d, OP_UUU_D, H8, H8, DO_ROL)
+GEN_VEXT_VX_RTOL(vror_vx_b, 1)
+GEN_VEXT_VX_RTOL(vror_vx_h, 2)
+GEN_VEXT_VX_RTOL(vror_vx_w, 4)
+GEN_VEXT_VX_RTOL(vror_vx_d, 8)
+
+RVVCALL(OPIVV2, vrol_vv_b, OP_UUU_B, H1, H1, H1, DO_ROL)
+RVVCALL(OPIVV2, vrol_vv_h, OP_UUU_H, H2, H2, H2, DO_ROL)
+RVVCALL(OPIVV2, vrol_vv_w, OP_UUU_W, H4, H4, H4, DO_ROL)
+RVVCALL(OPIVV2, vrol_vv_d, OP_UUU_D, H8, H8, H8, DO_ROL)
+GEN_VEXT_VV(vrol_vv_b, 1)
+GEN_VEXT_VV(vrol_vv_h, 2)
+GEN_VEXT_VV(vrol_vv_w, 4)
+GEN_VEXT_VV(vrol_vv_d, 8)
+
+RVVCALL(OPIVX2, vrol_vx_b, OP_UUU_B, H1, H1, DO_ROL)
+RVVCALL(OPIVX2, vrol_vx_h, OP_UUU_H, H2, H2, DO_ROL)
+RVVCALL(OPIVX2, vrol_vx_w, OP_UUU_W, H4, H4, DO_ROL)
+RVVCALL(OPIVX2, vrol_vx_d, OP_UUU_D, H8, H8, DO_ROL)
+GEN_VEXT_VX(vrol_vx_b, 1)
+GEN_VEXT_VX(vrol_vx_h, 2)
+GEN_VEXT_VX(vrol_vx_w, 4)
+GEN_VEXT_VX(vrol_vx_d, 8)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index ab470092f6..ff7b03cbe3 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -76,26 +76,6 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
     return vl;
 }
 
-/*
- * Note that vector data is stored in host-endian 64-bit chunks,
- * so addressing units smaller than that needs a host-endian fixup.
- */
-#if HOST_BIG_ENDIAN
-#define H1(x)   ((x) ^ 7)
-#define H1_2(x) ((x) ^ 6)
-#define H1_4(x) ((x) ^ 4)
-#define H2(x)   ((x) ^ 3)
-#define H4(x)   ((x) ^ 1)
-#define H8(x)   ((x))
-#else
-#define H1(x)   (x)
-#define H1_2(x) (x)
-#define H1_4(x) (x)
-#define H2(x)   (x)
-#define H4(x)   (x)
-#define H8(x)   (x)
-#endif
-
 /*
  * Get the maximum number of elements can be operated.
  *
@@ -683,18 +663,11 @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
  *** Vector Integer Arithmetic Instructions
  */
 
-/* expand macro args before macro */
-#define RVVCALL(macro, ...)  macro(__VA_ARGS__)
-
 /* (TD, T1, T2, TX1, TX2) */
 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
-#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
-#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
-#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
-#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
@@ -718,14 +691,6 @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
 
-
-#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
-static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
-{                                                               \
-    TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
-    TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
-    *((TD *)vd + HD(i)) = OP(s2, s1);                           \
-}
 #define DO_SUB(N, M) (N - M)
 #define DO_RSUB(N, M) (M - N)
 
@@ -747,16 +712,6 @@ GEN_VEXT_VV(vsub_vv_h, 2)
 GEN_VEXT_VV(vsub_vv_w, 4)
 GEN_VEXT_VV(vsub_vv_d, 8)
 
-/*
- * (T1)s1 gives the real operator type.
- * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
- */
-#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
-static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
-{                                                                   \
-    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
-    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
-}
 
 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h
index 49529d2379..a0fbac7bf3 100644
--- a/target/riscv/vector_internals.h
+++ b/target/riscv/vector_internals.h
@@ -28,6 +28,26 @@ static inline uint32_t vext_nf(uint32_t desc)
     return FIELD_EX32(simd_data(desc), VDATA, NF);
 }
 
+/*
+ * Note that vector data is stored in host-endian 64-bit chunks,
+ * so addressing units smaller than that needs a host-endian fixup.
+ */
+#if HOST_BIG_ENDIAN
+#define H1(x)   ((x) ^ 7)
+#define H1_2(x) ((x) ^ 6)
+#define H1_4(x) ((x) ^ 4)
+#define H2(x)   ((x) ^ 3)
+#define H4(x)   ((x) ^ 1)
+#define H8(x)   ((x))
+#else
+#define H1(x)   (x)
+#define H1_2(x) (x)
+#define H1_4(x) (x)
+#define H2(x)   (x)
+#define H4(x)   (x)
+#define H8(x)   (x)
+#endif
+
 /*
  * Encode LMUL to lmul as following:
  *     LMUL    vlmul    lmul
@@ -96,9 +116,30 @@ static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
 void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
                        uint32_t tot);
 
+/*
+ *** Vector Integer Arithmetic Instructions
+ */
+
+/* expand macro args before macro */
+#define RVVCALL(macro, ...)  macro(__VA_ARGS__)
+
+/* (TD, T1, T2, TX1, TX2) */
+#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
+#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
+#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
+#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
+
 /* operation of two vector elements */
 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
 
+#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
+{                                                               \
+    TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
+    *((TD *)vd + HD(i)) = OP(s2, s1);                           \
+}
+
 void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
                 CPURISCVState *env, uint32_t desc,
                 opivv2_fn *fn, uint32_t esz);
@@ -115,6 +156,17 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
 
 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
 
+/*
+ * (T1)s1 gives the real operator type.
+ * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
+ */
+#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
+static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
+{                                                                   \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
+    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
+}
+
 void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
                 CPURISCVState *env, uint32_t desc,
                 opivx2_fn fn, uint32_t esz);
-- 
2.39.1



  parent reply	other threads:[~2023-02-02 12:45 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-02 12:41 [PATCH 00/39] Add RISC-V vector cryptography extensions Lawrence Hunter
2023-02-02 12:41 ` [PATCH 01/39] target/riscv: add zvkb cpu property Lawrence Hunter
2023-02-02 12:41 ` [PATCH 02/39] target/riscv: Add vclmul.vv decoding, translation and execution support Lawrence Hunter
2023-02-02 13:53   ` Philipp Tomsich
2023-02-02 12:41 ` [PATCH 03/39] target/riscv: Add vclmul.vx " Lawrence Hunter
2023-02-02 13:59   ` Philipp Tomsich
2023-02-02 12:41 ` [PATCH 04/39] target/riscv: Add vclmulh.vv " Lawrence Hunter
2023-02-02 14:03   ` Philipp Tomsich
2023-02-02 16:54   ` Richard Henderson
2023-02-02 12:41 ` [PATCH 05/39] target/riscv: Add vclmulh.vx " Lawrence Hunter
2023-02-02 12:41 ` Lawrence Hunter [this message]
2023-02-02 12:41   ` [PATCH 06/39] target/riscv: Add vrol.[vv, vx] and vror.[vv, vx, vi] " Lawrence Hunter
2023-02-02 14:13   ` [PATCH 06/39] target/riscv: Add vrol.[vv,vx] and vror.[vv,vx,vi] " Philipp Tomsich
2023-02-02 14:13     ` [PATCH 06/39] target/riscv: Add vrol.[vv, vx] and vror.[vv, vx, vi] " Philipp Tomsich
2023-02-02 14:30     ` [PATCH 06/39] target/riscv: Add vrol.[vv,vx] and vror.[vv,vx,vi] " Philipp Tomsich
2023-02-02 14:30       ` [PATCH 06/39] target/riscv: Add vrol.[vv, vx] and vror.[vv, vx, vi] " Philipp Tomsich
2023-02-02 17:35       ` Richard Henderson
2023-02-02 18:07         ` Philipp Tomsich
2023-02-02 23:14           ` Richard Henderson
2023-02-02 12:41 ` [PATCH 07/39] target/riscv: Add vbrev8.v " Lawrence Hunter
2023-02-02 14:21   ` Philipp Tomsich
2023-02-02 12:41 ` [PATCH 08/39] target/riscv: Add vrev8.v " Lawrence Hunter
2023-02-02 14:22   ` Philipp Tomsich
2023-02-02 12:42 ` [PATCH 09/39] target/riscv: Add vandn.[vv,vx,vi] " Lawrence Hunter
2023-02-02 12:42   ` [PATCH 09/39] target/riscv: Add vandn.[vv, vx, vi] " Lawrence Hunter
2023-02-02 14:29   ` [PATCH 09/39] target/riscv: Add vandn.[vv,vx,vi] " Philipp Tomsich
2023-02-02 12:42 ` [PATCH 10/39] target/riscv: expose zvkb cpu property Lawrence Hunter
2023-02-02 14:23   ` Philipp Tomsich
2023-02-02 14:24     ` Philipp Tomsich
2023-02-02 12:42 ` [PATCH 11/39] target/riscv: add zvkns " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 12/39] target/riscv: Add vaesef.vv decoding, translation and execution support Lawrence Hunter
2023-02-02 12:42 ` [PATCH 13/39] target/riscv: Add vaesef.vs " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 14/39] target/riscv: Add vaesdf.vv " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 15/39] target/riscv: Add vaesdf.vs " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 16/39] target/riscv: Add vaesdm.vv " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 17/39] target/riscv: Add vaesdm.vs " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 18/39] target/riscv: Add vaesz.vs " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 19/39] target/riscv: Add vaesem.vv " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 20/39] target/riscv: Add vaesem.vs " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 21/39] target/riscv: Add vaeskf1.vi " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 22/39] target/riscv: Add vaeskf2.vi " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 23/39] target/riscv: expose zvkns cpu property Lawrence Hunter
2023-02-02 12:42 ` [PATCH 24/39] target/riscv: add zvknh cpu properties Lawrence Hunter
2023-02-02 12:42 ` [PATCH 25/39] target/riscv: Add vsha2ms.vv decoding, translation and execution support Lawrence Hunter
2023-02-02 12:42 ` [PATCH 26/39] target/riscv: Add vsha2c[hl].vv " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 27/39] target/riscv: expose zvknh cpu properties Lawrence Hunter
2023-02-02 12:42 ` [PATCH 28/39] target/riscv: add zvksh cpu property Lawrence Hunter
2023-02-02 12:42 ` [PATCH 29/39] target/riscv: Add vsm3me.vv decoding, translation and execution support Lawrence Hunter
2023-02-02 12:42 ` [PATCH 30/39] target/riscv: Add vsm3c.vi " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 31/39] target/riscv: expose zvksh cpu property Lawrence Hunter
2023-02-02 12:42 ` [PATCH 32/39] target/riscv: add zvkg " Lawrence Hunter
2023-02-02 12:42 ` [PATCH 33/39] target/riscv: Add vghmac.vv decoding, translation and execution support Lawrence Hunter
2023-02-02 12:42 ` [PATCH 34/39] target/riscv: expose zvkg cpu property Lawrence Hunter
2023-02-02 12:42 ` [PATCH 35/39] crypto: Move SM4_SBOXWORD from target/riscv Lawrence Hunter
2023-02-02 17:02   ` Richard Henderson
2023-02-02 12:42 ` [PATCH 36/39] crypto: Add SM4 constant parameter CK Lawrence Hunter
2023-02-02 12:42 ` [PATCH 37/39] target/riscv: Add zvksed cfg property Lawrence Hunter
2023-02-02 12:42 ` [PATCH 38/39] target/riscv: Add Zvksed support Lawrence Hunter
2023-02-02 12:42 ` [PATCH 39/39] target/riscv: Expose Zvksed property Lawrence Hunter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230202124230.295997-7-lawrence.hunter@codethink.co.uk \
    --to=lawrence.hunter@codethink.co.uk \
    --cc=alistair.francis@wdc.com \
    --cc=bin.meng@windriver.com \
    --cc=dickon.hood@codethink.co.uk \
    --cc=frank.chang@sifive.com \
    --cc=kiran.ostrolenk@codethink.co.uk \
    --cc=kvm@vger.kernel.org \
    --cc=nazar.kazakov@codethink.co.uk \
    --cc=palmer@dabbelt.com \
    --cc=pbonzini@redhat.com \
    --cc=philipp.tomsich@vrull.eu \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.