* [Qemu-riscv] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-28 2:36 liuzhiwei
2019-08-28 9:08 ` [Qemu-riscv] " Alex Bennée
` (4 more replies)
0 siblings, 5 replies; 52+ messages in thread
From: liuzhiwei @ 2019-08-28 2:36 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: aurelien, peter.maydell, alex.bennee, riku.voipio, laurent,
palmer, Alistair.Francis, sagark, kbastian, liuzhiwei
Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
---
fpu/softfloat.c | 119 +
include/fpu/softfloat.h | 4 +
linux-user/riscv/cpu_loop.c | 8 +-
target/riscv/Makefile.objs | 2 +-
target/riscv/cpu.h | 30 +
target/riscv/cpu_bits.h | 15 +
target/riscv/cpu_helper.c | 7 +
target/riscv/csr.c | 65 +-
target/riscv/helper.h | 354 +
target/riscv/insn32.decode | 374 +-
target/riscv/insn_trans/trans_rvv.inc.c | 484 +
target/riscv/translate.c | 1 +
target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
13 files changed, 28017 insertions(+), 9 deletions(-)
create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
create mode 100644 target/riscv/vector_helper.c
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 2ba36ec..da155ea 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a)
}
/*----------------------------------------------------------------------------
+| Returns the sign bit of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloat16Sign(float16 a)
+{
+ return float16_val(a) >> 0xf;
+}
+
+
+/*----------------------------------------------------------------------------
| Returns the fraction bits of the single-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
@@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status *status)
}
/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise. The invalid
+| exception is raised if either operand is a NaN. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float16_le(float16 a, float16 b, float_status *status)
+{
+ flag aSign, bSign;
+ uint16_t av, bv;
+ a = float16_squash_input_denormal(a, status);
+ b = float16_squash_input_denormal(b, status);
+
+ if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
+ || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
+ ) {
+ float_raise(float_flag_invalid, status);
+ return 0;
+ }
+ aSign = extractFloat16Sign( a );
+ bSign = extractFloat16Sign( b );
+ av = float16_val(a);
+ bv = float16_val(b);
+ if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 ) == 0 );
+ return ( av == bv ) || ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is less than
| or equal to the corresponding value `b', and 0 otherwise. The invalid
| exception is raised if either operand is a NaN. The comparison is performed
@@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status *status)
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
+int float16_lt(float16 a, float16 b, float_status *status)
+{
+ flag aSign, bSign;
+ uint16_t av, bv;
+ a = float16_squash_input_denormal(a, status);
+ b = float16_squash_input_denormal(b, status);
+
+ if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
+ || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
+ ) {
+ float_raise(float_flag_invalid, status);
+ return 0;
+ }
+ aSign = extractFloat16Sign( a );
+ bSign = extractFloat16Sign( b );
+ av = float16_val(a);
+ bv = float16_val(b);
+ if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 ) != 0 );
+ return ( av != bv ) && ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. The invalid exception is
+| raised if either operand is a NaN. The comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
int float32_lt(float32 a, float32 b, float_status *status)
{
flag aSign, bSign;
@@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b, float_status *status)
}
/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
+| exception. The comparison is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float16_eq_quiet(float16 a, float16 b, float_status *status)
+{
+ a = float16_squash_input_denormal(a, status);
+ b = float16_squash_input_denormal(b, status);
+
+ if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
+ || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
+ ) {
+ if (float16_is_signaling_nan(a, status)
+ || float16_is_signaling_nan(b, status)) {
+ float_raise(float_flag_invalid, status);
+ }
+ return 0;
+ }
+ return ( float16_val(a) == float16_val(b) ) ||
+ ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0 );
+}
+
+
+/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is equal to
| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
| exception. The comparison is performed according to the IEC/IEEE Standard
@@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b, float_status *status)
}
/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float16_unordered_quiet(float16 a, float16 b, float_status *status)
+{
+ a = float16_squash_input_denormal(a, status);
+ b = float16_squash_input_denormal(b, status);
+
+ if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
+ || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
+ ) {
+ if (float16_is_signaling_nan(a, status)
+ || float16_is_signaling_nan(b, status)) {
+ float_raise(float_flag_invalid, status);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+
+/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point values `a' and `b' cannot
| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
| comparison is performed according to the IEC/IEEE Standard for Binary
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 3ff3fa5..3b0754c 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status);
float16 float16_sqrt(float16, float_status *status);
int float16_compare(float16, float16, float_status *status);
int float16_compare_quiet(float16, float16, float_status *status);
+int float16_unordered_quiet(float16, float16, float_status *status);
+int float16_le(float16, float16, float_status *status);
+int float16_lt(float16, float16, float_status *status);
+int float16_eq_quiet(float16, float16, float_status *status);
int float16_is_quiet_nan(float16, float_status *status);
int float16_is_signaling_nan(float16, float_status *status);
diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
index 12aa3c0..b01548a 100644
--- a/linux-user/riscv/cpu_loop.c
+++ b/linux-user/riscv/cpu_loop.c
@@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
signum = 0;
sigcode = 0;
sigaddr = 0;
-
+ if (env->foflag) {
+ if (env->vfp.vl != 0) {
+ env->foflag = false;
+ env->pc += 4;
+ continue;
+ }
+ }
switch (trapnr) {
case EXCP_INTERRUPT:
/* just indicate that signals should be handled asap */
diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
index b1c79bc..d577cef 100644
--- a/target/riscv/Makefile.objs
+++ b/target/riscv/Makefile.objs
@@ -1,4 +1,4 @@
-obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o pmp.o
+obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o vector_helper.o gdbstub.o pmp.o
DECODETREE = $(SRC_PATH)/scripts/decodetree.py
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 0adb307..5a93aa2 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -67,6 +67,7 @@
#define RVC RV('C')
#define RVS RV('S')
#define RVU RV('U')
+#define RVV RV('V')
/* S extension denotes that Supervisor mode exists, however it is possible
to have a core that support S mode but does not have an MMU and there
@@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState;
#include "pmp.h"
+#define VLEN 128
+#define VUNIT(x) (VLEN / x)
+
struct CPURISCVState {
target_ulong gpr[32];
uint64_t fpr[32]; /* assume both F and D extensions */
+
+ /* vector coprocessor state. */
+ struct {
+ union VECTOR {
+ float64 f64[VUNIT(64)];
+ float32 f32[VUNIT(32)];
+ float16 f16[VUNIT(16)];
+ target_ulong ul[VUNIT(sizeof(target_ulong))];
+ uint64_t u64[VUNIT(64)];
+ int64_t s64[VUNIT(64)];
+ uint32_t u32[VUNIT(32)];
+ int32_t s32[VUNIT(32)];
+ uint16_t u16[VUNIT(16)];
+ int16_t s16[VUNIT(16)];
+ uint8_t u8[VUNIT(8)];
+ int8_t s8[VUNIT(8)];
+ } vreg[32];
+ target_ulong vxrm;
+ target_ulong vxsat;
+ target_ulong vl;
+ target_ulong vstart;
+ target_ulong vtype;
+ float_status fp_status;
+ } vfp;
+
+ bool foflag;
target_ulong pc;
target_ulong load_res;
target_ulong load_val;
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 11f971a..9eb43ec 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -29,6 +29,14 @@
#define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT)
#define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
+/* Vector Fixed-Point round model */
+#define FSR_VXRM_SHIFT 9
+#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT)
+
+/* Vector Fixed-Point saturation flag */
+#define FSR_VXSAT_SHIFT 8
+#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT)
+
/* Control and Status Registers */
/* User Trap Setup */
@@ -48,6 +56,13 @@
#define CSR_FRM 0x002
#define CSR_FCSR 0x003
+/* User Vector CSRs */
+#define CSR_VSTART 0x008
+#define CSR_VXSAT 0x009
+#define CSR_VXRM 0x00a
+#define CSR_VL 0xc20
+#define CSR_VTYPE 0xc21
+
/* User Timers and Counters */
#define CSR_CYCLE 0xc00
#define CSR_TIME 0xc01
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index e32b612..405caf6 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
[PRV_H] = RISCV_EXCP_H_ECALL,
[PRV_M] = RISCV_EXCP_M_ECALL
};
+ if (env->foflag) {
+ if (env->vfp.vl != 0) {
+ env->foflag = false;
+ env->pc += 4;
+ return;
+ }
+ }
if (!async) {
/* set tval to badaddr for traps with address information */
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index e0d4586..a6131ff 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno)
return 0;
}
-#if !defined(CONFIG_USER_ONLY)
static int any(CPURISCVState *env, int csrno)
{
return 0;
}
+#if !defined(CONFIG_USER_ONLY)
static int smode(CPURISCVState *env, int csrno)
{
return -!riscv_has_ext(env, RVS);
@@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val)
return -1;
}
#endif
- *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
- | (env->frm << FSR_RD_SHIFT);
+ *val = (env->vfp.vxrm << FSR_VXRM_SHIFT)
+ | (env->vfp.vxsat << FSR_VXSAT_SHIFT)
+ | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
+ | (env->frm << FSR_RD_SHIFT);
return 0;
}
@@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val)
env->mstatus |= MSTATUS_FS;
#endif
env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
+ env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
+ env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
return 0;
}
+static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val)
+{
+ *val = env->vfp.vtype;
+ return 0;
+}
+
+static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
+{
+ *val = env->vfp.vl;
+ return 0;
+}
+
+static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val)
+{
+ *val = env->vfp.vxrm;
+ return 0;
+}
+
+static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val)
+{
+ *val = env->vfp.vxsat;
+ return 0;
+}
+
+static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val)
+{
+ *val = env->vfp.vstart;
+ return 0;
+}
+
+static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val)
+{
+ env->vfp.vxrm = val;
+ return 0;
+}
+
+static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val)
+{
+ env->vfp.vxsat = val;
+ return 0;
+}
+
+static int write_vstart(CPURISCVState *env, int csrno, target_ulong val)
+{
+ env->vfp.vstart = val;
+ return 0;
+}
+
/* User Timers and Counters */
static int read_instret(CPURISCVState *env, int csrno, target_ulong *val)
{
@@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
[CSR_FFLAGS] = { fs, read_fflags, write_fflags },
[CSR_FRM] = { fs, read_frm, write_frm },
[CSR_FCSR] = { fs, read_fcsr, write_fcsr },
-
+ /* Vector CSRs */
+ [CSR_VSTART] = { any, read_vstart, write_vstart },
+ [CSR_VXSAT] = { any, read_vxsat, write_vxsat },
+ [CSR_VXRM] = { any, read_vxrm, write_vxrm },
+ [CSR_VL] = { any, read_vl },
+ [CSR_VTYPE] = { any, read_vtype },
/* User Timers and Counters */
[CSR_CYCLE] = { ctr, read_instret },
[CSR_INSTRET] = { ctr, read_instret },
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index debb22a..fee02c0 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl)
DEF_HELPER_1(wfi, void, env)
DEF_HELPER_1(tlb_flush, void, env)
#endif
+/* Vector functions */
+DEF_HELPER_5(vector_vlb_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlh_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlw_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vle_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlbu_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlhu_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlwu_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlbff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlhff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlwff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vleff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlbuff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlhuff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlwuff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsb_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsh_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsw_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vse_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlsb_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlsh_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlsw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlse_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlsbu_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlshu_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlswu_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vssb_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vssh_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vssw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vsse_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlxb_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlxh_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlxw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlxe_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlxbu_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlxhu_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vlxwu_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vsxb_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vsxh_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vsxw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vsxe_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vsuxb_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vsuxh_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vsuxw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vsuxe_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamoswapw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamoswapd_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamoaddw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamoaddd_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamoxorw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamoxord_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamoandw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamoandd_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamoorw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamoord_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamominw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamomind_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamomaxw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamomaxd_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamominuw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamominud_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamomaxuw_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vector_vamomaxud_v, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_4(vector_vext_x_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfmv_f_s, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmv_s_x, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfmv_s_f, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vadc_vvm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vadc_vxm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vadc_vim, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmadc_vvm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmadc_vxm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmadc_vim, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vsbc_vvm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vsbc_vxm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmsbc_vvm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmsbc_vxm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmpopc_m, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmfirst_m, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vcompress_vm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmandnot_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmand_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmor_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmxor_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmornot_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmnand_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmnor_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmxnor_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmsbf_m, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmsof_m, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmsif_m, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_viota_m, void, env, i32, i32, i32)
+DEF_HELPER_3(vector_vid_v, void, env, i32, i32)
+DEF_HELPER_4(vector_vfcvt_xu_f_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfcvt_x_f_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfcvt_f_xu_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfcvt_f_x_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfwcvt_xu_f_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfwcvt_x_f_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfwcvt_f_xu_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfwcvt_f_x_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfwcvt_f_f_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfncvt_xu_f_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfncvt_x_f_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfncvt_f_xu_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfncvt_f_x_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfncvt_f_f_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfsqrt_v, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vfclass_v, void, env, i32, i32, i32)
+DEF_HELPER_5(vector_vadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vadd_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vadd_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vredsum_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfadd_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vredand_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfredsum_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsub_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vredor_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfsub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfsub_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vrsub_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vrsub_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vredxor_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfredosum_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vminu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vminu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vredminu_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmin_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmin_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmin_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmin_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vredmin_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfredmin_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmaxu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmaxu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vredmaxu_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmax_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmax_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmax_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmax_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vredmax_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfredmax_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfsgnj_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfsgnj_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vand_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vand_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vand_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfsgnjn_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfsgnjn_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vor_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vor_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vor_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfsgnjx_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfsgnjx_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vxor_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vxor_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vxor_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vrgather_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vrgather_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vrgather_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vslideup_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vslideup_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vslide1up_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vslidedown_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vslidedown_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vslide1down_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmerge_vvm, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmerge_vxm, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmerge_vim, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmerge_vfm, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmseq_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmseq_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmseq_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmfeq_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmfeq_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsne_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsne_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsne_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmfle_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmfle_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsltu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsltu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmford_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmford_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmslt_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmslt_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmflt_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmflt_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsleu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsleu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsleu_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmfne_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmfne_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsle_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsle_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsle_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmfgt_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsgtu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsgtu_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsgt_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmsgt_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmfge_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsaddu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsaddu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsaddu_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vdivu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vdivu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfdiv_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfdiv_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsadd_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsadd_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vdiv_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vdiv_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfrdiv_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vssubu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vssubu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vremu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vremu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vssub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vssub_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vrem_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vrem_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vaadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vaadd_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vaadd_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmulhu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmulhu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmul_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmul_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsll_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsll_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsll_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmul_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmul_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vasub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vasub_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmulhsu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmulhsu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsmul_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsmul_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmulh_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmulh_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfrsub_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsrl_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsrl_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsrl_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmadd_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsra_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsra_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vsra_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmadd_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmadd_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vssrl_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vssrl_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vssrl_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmsub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmsub_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vssra_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vssra_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vssra_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnmsub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnmsub_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmsub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmsub_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnsrl_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnsrl_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnsrl_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmacc_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmacc_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnsra_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnsra_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnsra_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmacc_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vmacc_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmacc_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmacc_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnclipu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnclipu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnclipu_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmsac_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmsac_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnclip_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnclip_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnclip_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnmsac_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vnmsac_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmsac_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmsac_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwredsumu_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwaddu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwaddu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwadd_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwredsum_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwadd_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwredsum_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsubu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsubu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwsub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwsub_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsub_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwredosum_vs, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwaddu_wv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwaddu_wx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwadd_wv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwadd_wf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwadd_wv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwadd_wx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsubu_wv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsubu_wx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwsub_wv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwsub_wf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsub_wv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsub_wx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmulu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmulu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwmul_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwmul_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmulsu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmulsu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmul_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmul_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsmaccu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsmaccu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmaccu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmaccu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwmacc_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwmacc_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsmacc_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsmacc_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmacc_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmacc_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwnmacc_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwnmacc_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsmaccsu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsmaccsu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmaccsu_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmaccsu_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwmsac_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwmsac_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwsmaccus_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vwmaccus_vx, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwnmsac_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwnmsac_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 77f794e..d125ff9 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -25,7 +25,7 @@
%sh10 20:10
%csr 20:12
%rm 12:3
-
+%nf 29:3
# immediates:
%imm_i 20:s12
%imm_s 25:s7 7:5
@@ -43,7 +43,6 @@
&u imm rd
&shift shamt rs1 rd
&atomic aq rl rs2 rs1 rd
-
# Formats 32:
@r ....... ..... ..... ... ..... ....... &r %rs2 %rs1 %rd
@i ............ ..... ... ..... ....... &i imm=%imm_i %rs1 %rd
@@ -62,11 +61,17 @@
@r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
@r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd
@r2 ....... ..... ..... ... ..... ....... %rs1 %rd
+@r_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
+@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
+@r_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
+@r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd
+@r2_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rd
+@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd
+@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
@sfence_vma ....... ..... ..... ... ..... ....... %rs2 %rs1
@sfence_vm ....... ..... ..... ... ..... ....... %rs1
-
# *** Privileged Instructions ***
ecall 000000000000 00000 000 00000 1110011
ebreak 000000000001 00000 000 00000 1110011
@@ -203,3 +208,366 @@ fcvt_w_d 1100001 00000 ..... ... ..... 1010011 @r2_rm
fcvt_wu_d 1100001 00001 ..... ... ..... 1010011 @r2_rm
fcvt_d_w 1101001 00000 ..... ... ..... 1010011 @r2_rm
fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm
+
+# *** RV32V Standard Extension ***
+
+# *** Vector loads and stores are encoded within LOADFP/STORE-FP ***
+vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm
+vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm
+vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm
+vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm
+vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm
+vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm
+vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm
+vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm
+vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm
+vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm
+vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm
+vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm
+vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm
+vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm
+vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm
+vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm
+vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm
+vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm
+
+vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm
+vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm
+vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm
+vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm
+vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm
+vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm
+vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm
+vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm
+vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm
+vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm
+vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm
+
+vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm
+vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm
+vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm
+vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm
+vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm
+vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm
+vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm
+vsxb_v ... 011 . ..... ..... 000 ..... 0100111 @r_nfvm
+vsxh_v ... 011 . ..... ..... 101 ..... 0100111 @r_nfvm
+vsxw_v ... 011 . ..... ..... 110 ..... 0100111 @r_nfvm
+vsxe_v ... 011 . ..... ..... 111 ..... 0100111 @r_nfvm
+vsuxb_v ... 111 . ..... ..... 000 ..... 0100111 @r_nfvm
+vsuxh_v ... 111 . ..... ..... 101 ..... 0100111 @r_nfvm
+vsuxw_v ... 111 . ..... ..... 110 ..... 0100111 @r_nfvm
+vsuxe_v ... 111 . ..... ..... 111 ..... 0100111 @r_nfvm
+
+#*** Vector AMO operations are encoded under the standard AMO major opcode.***
+vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm
+vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm
+vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm
+vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm
+vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm
+vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm
+vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm
+vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm
+vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm
+vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm
+
+#*** new major opcode OP-V ***
+vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm
+vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm
+vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm
+vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm
+vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm
+vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm
+vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm
+vfredsum_vs 000001 . ..... ..... 001 ..... 1010111 @r_vm
+vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm
+vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm
+vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm
+vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm
+vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm
+vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm
+vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm
+vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm
+vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm
+vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm
+vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm
+vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm
+vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm
+vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm
+vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm
+vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm
+vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm
+vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm
+vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm
+vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm
+vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm
+vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm
+vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm
+vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm
+vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm
+vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm
+vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm
+vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm
+vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm
+vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm
+vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm
+vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm
+vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm
+vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm
+vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm
+vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm
+vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm
+vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm
+vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm
+vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm
+vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm
+vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm
+vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm
+vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm
+vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm
+vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r
+vfmv_f_s 001100 1 ..... ..... 001 ..... 1010111 @r
+vmv_s_x 001101 1 ..... ..... 110 ..... 1010111 @r
+vfmv_s_f 001101 1 ..... ..... 101 ..... 1010111 @r
+vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm
+vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm
+vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm
+vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm
+vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm
+vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm
+vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r
+vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r
+vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r
+vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r
+vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r
+vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r
+vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r
+vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r
+vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r
+vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r
+vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm
+vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm
+vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm
+vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm
+vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm
+viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm
+vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm
+vmerge_vvm 010111 . ..... ..... 000 ..... 1010111 @r_vm
+vmerge_vxm 010111 . ..... ..... 100 ..... 1010111 @r_vm
+vmerge_vim 010111 . ..... ..... 011 ..... 1010111 @r_vm
+vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r
+vfmerge_vfm 010111 . ..... ..... 101 ..... 1010111 @r_vm
+vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm
+vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm
+vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm
+vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r
+vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm
+vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm
+vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm
+vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm
+vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm
+vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r
+vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm
+vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm
+vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm
+vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm
+vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r
+vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm
+vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm
+vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm
+vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm
+vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r
+vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm
+vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm
+vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm
+vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm
+vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm
+vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r
+vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm
+vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm
+vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm
+vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm
+vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm
+vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r
+vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm
+vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm
+vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm
+vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r
+vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm
+vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm
+vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r
+vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm
+vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm
+vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm
+vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm
+vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm
+vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm
+vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm
+vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm
+vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm
+vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm
+vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm
+vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm
+vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm
+vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm
+vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm
+vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm
+vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm
+vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm
+vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm
+vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm
+vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm
+vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm
+vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm
+vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm
+vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm
+vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm
+vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm
+vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm
+vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm
+vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm
+vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm
+vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm
+vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm
+vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm
+vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm
+vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm
+vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm
+vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm
+vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm
+vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm
+vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm
+vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm
+vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm
+vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm
+vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm
+vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm
+vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm
+vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm
+vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm
+vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm
+vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm
+vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm
+vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm
+vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm
+vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm
+vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm
+vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm
+vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm
+vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm
+vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm
+vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm
+vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm
+vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm
+vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm
+vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm
+vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm
+vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm
+vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm
+vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm
+vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm
+vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm
+vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm
+vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm
+vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm
+vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm
+vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm
+vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm
+vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm
+vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm
+vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm
+vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm
+vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm
+vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm
+vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm
+vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm
+vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm
+vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm
+vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm
+vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm
+vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm
+vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm
+vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm
+vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm
+vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm
+vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm
+vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm
+vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm
+vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm
+vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm
+vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm
+vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm
+vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm
+vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm
+vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm
+vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm
+vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm
+vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm
+vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm
+vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm
+vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm
+vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm
+vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm
+vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm
+vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm
+vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm
+vfwredsum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm
+vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm
+vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm
+vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm
+vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm
+vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm
+vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm
+vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm
+vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm
+vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm
+vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm
+vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm
+vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm
+vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm
+vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm
+vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm
+vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm
+vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm
+vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm
+vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm
+vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm
+vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm
+vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm
+vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm
+vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm
+vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm
+vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm
+vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm
+vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm
+vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm
+vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm
+vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm
+vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm
+vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm
+vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm
+vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm
+vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm
+vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm
+vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm
+vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm
+vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm
+vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm
+vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm
+vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm
+vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm
+vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm
+vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm
+vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm
+vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm
+vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm
+vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
+vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
new file mode 100644
index 0000000..dc8e6ce
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -0,0 +1,484 @@
+/*
+ * RISC-V translation routines for the RVV Standard Extension.
+ *
+ * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define GEN_VECTOR_R2_NFVM(INSN) \
+static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
+{ \
+ TCGv_i32 s1 = tcg_const_i32(a->rs1); \
+ TCGv_i32 d = tcg_const_i32(a->rd); \
+ TCGv_i32 nf = tcg_const_i32(a->nf); \
+ TCGv_i32 vm = tcg_const_i32(a->vm); \
+ gen_helper_vector_##INSN(cpu_env, nf, vm, s1, d); \
+ tcg_temp_free_i32(s1); \
+ tcg_temp_free_i32(d); \
+ tcg_temp_free_i32(nf); \
+ tcg_temp_free_i32(vm); \
+ return true; \
+}
+#define GEN_VECTOR_R_NFVM(INSN) \
+static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
+{ \
+ TCGv_i32 s1 = tcg_const_i32(a->rs1); \
+ TCGv_i32 s2 = tcg_const_i32(a->rs2); \
+ TCGv_i32 d = tcg_const_i32(a->rd); \
+ TCGv_i32 nf = tcg_const_i32(a->nf); \
+ TCGv_i32 vm = tcg_const_i32(a->vm); \
+ gen_helper_vector_##INSN(cpu_env, nf, vm, s1, s2, d);\
+ tcg_temp_free_i32(s1); \
+ tcg_temp_free_i32(s2); \
+ tcg_temp_free_i32(d); \
+ tcg_temp_free_i32(nf); \
+ tcg_temp_free_i32(vm); \
+ return true; \
+}
+
+#define GEN_VECTOR_R_WDVM(INSN) \
+static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
+{ \
+ TCGv_i32 s1 = tcg_const_i32(a->rs1); \
+ TCGv_i32 s2 = tcg_const_i32(a->rs2); \
+ TCGv_i32 d = tcg_const_i32(a->rd); \
+ TCGv_i32 wd = tcg_const_i32(a->wd); \
+ TCGv_i32 vm = tcg_const_i32(a->vm); \
+ gen_helper_vector_##INSN(cpu_env, wd, vm, s1, s2, d);\
+ tcg_temp_free_i32(s1); \
+ tcg_temp_free_i32(s2); \
+ tcg_temp_free_i32(d); \
+ tcg_temp_free_i32(wd); \
+ tcg_temp_free_i32(vm); \
+ return true; \
+}
+#define GEN_VECTOR_R(INSN) \
+static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
+{ \
+ TCGv_i32 s1 = tcg_const_i32(a->rs1); \
+ TCGv_i32 s2 = tcg_const_i32(a->rs2); \
+ TCGv_i32 d = tcg_const_i32(a->rd); \
+ gen_helper_vector_##INSN(cpu_env, s1, s2, d); \
+ tcg_temp_free_i32(s1); \
+ tcg_temp_free_i32(s2); \
+ tcg_temp_free_i32(d); \
+ return true; \
+}
+#define GEN_VECTOR_R2_VM(INSN) \
+static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
+{ \
+ TCGv_i32 s2 = tcg_const_i32(a->rs2); \
+ TCGv_i32 d = tcg_const_i32(a->rd); \
+ TCGv_i32 vm = tcg_const_i32(a->vm); \
+ gen_helper_vector_##INSN(cpu_env, vm, s2, d); \
+ tcg_temp_free_i32(s2); \
+ tcg_temp_free_i32(d); \
+ tcg_temp_free_i32(vm); \
+ return true; \
+}
+
+#define GEN_VECTOR_R1_VM(INSN) \
+static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
+{ \
+ TCGv_i32 d = tcg_const_i32(a->rd); \
+ TCGv_i32 vm = tcg_const_i32(a->vm); \
+ gen_helper_vector_##INSN(cpu_env, vm, d); \
+ tcg_temp_free_i32(d); \
+ tcg_temp_free_i32(vm); \
+ return true; \
+}
+#define GEN_VECTOR_R_VM(INSN) \
+static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
+{ \
+ TCGv_i32 s1 = tcg_const_i32(a->rs1); \
+ TCGv_i32 s2 = tcg_const_i32(a->rs2); \
+ TCGv_i32 d = tcg_const_i32(a->rd); \
+ TCGv_i32 vm = tcg_const_i32(a->vm); \
+ gen_helper_vector_##INSN(cpu_env, vm, s1, s2, d); \
+ tcg_temp_free_i32(s1); \
+ tcg_temp_free_i32(s2); \
+ tcg_temp_free_i32(d); \
+ tcg_temp_free_i32(vm); \
+ return true; \
+}
+#define GEN_VECTOR_R2_ZIMM(INSN) \
+static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
+{ \
+ TCGv_i32 s1 = tcg_const_i32(a->rs1); \
+ TCGv_i32 zimm = tcg_const_i32(a->zimm); \
+ TCGv_i32 d = tcg_const_i32(a->rd); \
+ gen_helper_vector_##INSN(cpu_env, s1, zimm, d); \
+ tcg_temp_free_i32(s1); \
+ tcg_temp_free_i32(zimm); \
+ tcg_temp_free_i32(d); \
+ return true; \
+}
+
+GEN_VECTOR_R2_NFVM(vlb_v)
+GEN_VECTOR_R2_NFVM(vlh_v)
+GEN_VECTOR_R2_NFVM(vlw_v)
+GEN_VECTOR_R2_NFVM(vle_v)
+GEN_VECTOR_R2_NFVM(vlbu_v)
+GEN_VECTOR_R2_NFVM(vlhu_v)
+GEN_VECTOR_R2_NFVM(vlwu_v)
+GEN_VECTOR_R2_NFVM(vlbff_v)
+GEN_VECTOR_R2_NFVM(vlhff_v)
+GEN_VECTOR_R2_NFVM(vlwff_v)
+GEN_VECTOR_R2_NFVM(vleff_v)
+GEN_VECTOR_R2_NFVM(vlbuff_v)
+GEN_VECTOR_R2_NFVM(vlhuff_v)
+GEN_VECTOR_R2_NFVM(vlwuff_v)
+GEN_VECTOR_R2_NFVM(vsb_v)
+GEN_VECTOR_R2_NFVM(vsh_v)
+GEN_VECTOR_R2_NFVM(vsw_v)
+GEN_VECTOR_R2_NFVM(vse_v)
+
+GEN_VECTOR_R_NFVM(vlsb_v)
+GEN_VECTOR_R_NFVM(vlsh_v)
+GEN_VECTOR_R_NFVM(vlsw_v)
+GEN_VECTOR_R_NFVM(vlse_v)
+GEN_VECTOR_R_NFVM(vlsbu_v)
+GEN_VECTOR_R_NFVM(vlshu_v)
+GEN_VECTOR_R_NFVM(vlswu_v)
+GEN_VECTOR_R_NFVM(vssb_v)
+GEN_VECTOR_R_NFVM(vssh_v)
+GEN_VECTOR_R_NFVM(vssw_v)
+GEN_VECTOR_R_NFVM(vsse_v)
+GEN_VECTOR_R_NFVM(vlxb_v)
+GEN_VECTOR_R_NFVM(vlxh_v)
+GEN_VECTOR_R_NFVM(vlxw_v)
+GEN_VECTOR_R_NFVM(vlxe_v)
+GEN_VECTOR_R_NFVM(vlxbu_v)
+GEN_VECTOR_R_NFVM(vlxhu_v)
+GEN_VECTOR_R_NFVM(vlxwu_v)
+GEN_VECTOR_R_NFVM(vsxb_v)
+GEN_VECTOR_R_NFVM(vsxh_v)
+GEN_VECTOR_R_NFVM(vsxw_v)
+GEN_VECTOR_R_NFVM(vsxe_v)
+GEN_VECTOR_R_NFVM(vsuxb_v)
+GEN_VECTOR_R_NFVM(vsuxh_v)
+GEN_VECTOR_R_NFVM(vsuxw_v)
+GEN_VECTOR_R_NFVM(vsuxe_v)
+
+GEN_VECTOR_R_WDVM(vamoswapw_v)
+GEN_VECTOR_R_WDVM(vamoswapd_v)
+GEN_VECTOR_R_WDVM(vamoaddw_v)
+GEN_VECTOR_R_WDVM(vamoaddd_v)
+GEN_VECTOR_R_WDVM(vamoxorw_v)
+GEN_VECTOR_R_WDVM(vamoxord_v)
+GEN_VECTOR_R_WDVM(vamoandw_v)
+GEN_VECTOR_R_WDVM(vamoandd_v)
+GEN_VECTOR_R_WDVM(vamoorw_v)
+GEN_VECTOR_R_WDVM(vamoord_v)
+GEN_VECTOR_R_WDVM(vamominw_v)
+GEN_VECTOR_R_WDVM(vamomind_v)
+GEN_VECTOR_R_WDVM(vamomaxw_v)
+GEN_VECTOR_R_WDVM(vamomaxd_v)
+GEN_VECTOR_R_WDVM(vamominuw_v)
+GEN_VECTOR_R_WDVM(vamominud_v)
+GEN_VECTOR_R_WDVM(vamomaxuw_v)
+GEN_VECTOR_R_WDVM(vamomaxud_v)
+
+GEN_VECTOR_R(vext_x_v)
+GEN_VECTOR_R(vfmv_f_s)
+GEN_VECTOR_R(vmv_s_x)
+GEN_VECTOR_R(vfmv_s_f)
+GEN_VECTOR_R(vadc_vvm)
+GEN_VECTOR_R(vadc_vxm)
+GEN_VECTOR_R(vadc_vim)
+GEN_VECTOR_R(vmadc_vvm)
+GEN_VECTOR_R(vmadc_vxm)
+GEN_VECTOR_R(vmadc_vim)
+GEN_VECTOR_R(vsbc_vvm)
+GEN_VECTOR_R(vsbc_vxm)
+GEN_VECTOR_R(vmsbc_vvm)
+GEN_VECTOR_R(vmsbc_vxm)
+GEN_VECTOR_R2_VM(vmpopc_m)
+GEN_VECTOR_R2_VM(vmfirst_m)
+GEN_VECTOR_R(vcompress_vm)
+GEN_VECTOR_R(vmandnot_mm)
+GEN_VECTOR_R(vmand_mm)
+GEN_VECTOR_R(vmor_mm)
+GEN_VECTOR_R(vmxor_mm)
+GEN_VECTOR_R(vmornot_mm)
+GEN_VECTOR_R(vmnand_mm)
+GEN_VECTOR_R(vmnor_mm)
+GEN_VECTOR_R(vmxnor_mm)
+GEN_VECTOR_R2_VM(vmsbf_m)
+GEN_VECTOR_R2_VM(vmsof_m)
+GEN_VECTOR_R2_VM(vmsif_m)
+GEN_VECTOR_R2_VM(viota_m)
+GEN_VECTOR_R1_VM(vid_v)
+GEN_VECTOR_R2_VM(vfcvt_xu_f_v)
+GEN_VECTOR_R2_VM(vfcvt_x_f_v)
+GEN_VECTOR_R2_VM(vfcvt_f_xu_v)
+GEN_VECTOR_R2_VM(vfcvt_f_x_v)
+GEN_VECTOR_R2_VM(vfwcvt_xu_f_v)
+GEN_VECTOR_R2_VM(vfwcvt_x_f_v)
+GEN_VECTOR_R2_VM(vfwcvt_f_xu_v)
+GEN_VECTOR_R2_VM(vfwcvt_f_x_v)
+GEN_VECTOR_R2_VM(vfwcvt_f_f_v)
+GEN_VECTOR_R2_VM(vfncvt_xu_f_v)
+GEN_VECTOR_R2_VM(vfncvt_x_f_v)
+GEN_VECTOR_R2_VM(vfncvt_f_xu_v)
+GEN_VECTOR_R2_VM(vfncvt_f_x_v)
+GEN_VECTOR_R2_VM(vfncvt_f_f_v)
+GEN_VECTOR_R2_VM(vfsqrt_v)
+GEN_VECTOR_R2_VM(vfclass_v)
+
+GEN_VECTOR_R_VM(vadd_vv)
+GEN_VECTOR_R_VM(vadd_vx)
+GEN_VECTOR_R_VM(vadd_vi)
+GEN_VECTOR_R_VM(vredsum_vs)
+GEN_VECTOR_R_VM(vfadd_vv)
+GEN_VECTOR_R_VM(vfadd_vf)
+GEN_VECTOR_R_VM(vredand_vs)
+GEN_VECTOR_R_VM(vfredsum_vs)
+GEN_VECTOR_R_VM(vsub_vv)
+GEN_VECTOR_R_VM(vsub_vx)
+GEN_VECTOR_R_VM(vredor_vs)
+GEN_VECTOR_R_VM(vfsub_vv)
+GEN_VECTOR_R_VM(vfsub_vf)
+GEN_VECTOR_R_VM(vrsub_vx)
+GEN_VECTOR_R_VM(vrsub_vi)
+GEN_VECTOR_R_VM(vredxor_vs)
+GEN_VECTOR_R_VM(vfredosum_vs)
+GEN_VECTOR_R_VM(vminu_vv)
+GEN_VECTOR_R_VM(vminu_vx)
+GEN_VECTOR_R_VM(vredminu_vs)
+GEN_VECTOR_R_VM(vfmin_vv)
+GEN_VECTOR_R_VM(vfmin_vf)
+GEN_VECTOR_R_VM(vmin_vv)
+GEN_VECTOR_R_VM(vmin_vx)
+GEN_VECTOR_R_VM(vredmin_vs)
+GEN_VECTOR_R_VM(vfredmin_vs)
+GEN_VECTOR_R_VM(vmaxu_vv)
+GEN_VECTOR_R_VM(vmaxu_vx)
+GEN_VECTOR_R_VM(vredmaxu_vs)
+GEN_VECTOR_R_VM(vfmax_vv)
+GEN_VECTOR_R_VM(vfmax_vf)
+GEN_VECTOR_R_VM(vmax_vv)
+GEN_VECTOR_R_VM(vmax_vx)
+GEN_VECTOR_R_VM(vredmax_vs)
+GEN_VECTOR_R_VM(vfredmax_vs)
+GEN_VECTOR_R_VM(vfsgnj_vv)
+GEN_VECTOR_R_VM(vfsgnj_vf)
+GEN_VECTOR_R_VM(vand_vv)
+GEN_VECTOR_R_VM(vand_vx)
+GEN_VECTOR_R_VM(vand_vi)
+GEN_VECTOR_R_VM(vfsgnjn_vv)
+GEN_VECTOR_R_VM(vfsgnjn_vf)
+GEN_VECTOR_R_VM(vor_vv)
+GEN_VECTOR_R_VM(vor_vx)
+GEN_VECTOR_R_VM(vor_vi)
+GEN_VECTOR_R_VM(vfsgnjx_vv)
+GEN_VECTOR_R_VM(vfsgnjx_vf)
+GEN_VECTOR_R_VM(vxor_vv)
+GEN_VECTOR_R_VM(vxor_vx)
+GEN_VECTOR_R_VM(vxor_vi)
+GEN_VECTOR_R_VM(vrgather_vv)
+GEN_VECTOR_R_VM(vrgather_vx)
+GEN_VECTOR_R_VM(vrgather_vi)
+GEN_VECTOR_R_VM(vslideup_vx)
+GEN_VECTOR_R_VM(vslideup_vi)
+GEN_VECTOR_R_VM(vslide1up_vx)
+GEN_VECTOR_R_VM(vslidedown_vx)
+GEN_VECTOR_R_VM(vslidedown_vi)
+GEN_VECTOR_R_VM(vslide1down_vx)
+GEN_VECTOR_R_VM(vmerge_vvm)
+GEN_VECTOR_R_VM(vmerge_vxm)
+GEN_VECTOR_R_VM(vmerge_vim)
+GEN_VECTOR_R_VM(vfmerge_vfm)
+GEN_VECTOR_R_VM(vmseq_vv)
+GEN_VECTOR_R_VM(vmseq_vx)
+GEN_VECTOR_R_VM(vmseq_vi)
+GEN_VECTOR_R_VM(vmfeq_vv)
+GEN_VECTOR_R_VM(vmfeq_vf)
+GEN_VECTOR_R_VM(vmsne_vv)
+GEN_VECTOR_R_VM(vmsne_vx)
+GEN_VECTOR_R_VM(vmsne_vi)
+GEN_VECTOR_R_VM(vmfle_vv)
+GEN_VECTOR_R_VM(vmfle_vf)
+GEN_VECTOR_R_VM(vmsltu_vv)
+GEN_VECTOR_R_VM(vmsltu_vx)
+GEN_VECTOR_R_VM(vmford_vv)
+GEN_VECTOR_R_VM(vmford_vf)
+GEN_VECTOR_R_VM(vmslt_vv)
+GEN_VECTOR_R_VM(vmslt_vx)
+GEN_VECTOR_R_VM(vmflt_vv)
+GEN_VECTOR_R_VM(vmflt_vf)
+GEN_VECTOR_R_VM(vmsleu_vv)
+GEN_VECTOR_R_VM(vmsleu_vx)
+GEN_VECTOR_R_VM(vmsleu_vi)
+GEN_VECTOR_R_VM(vmfne_vv)
+GEN_VECTOR_R_VM(vmfne_vf)
+GEN_VECTOR_R_VM(vmsle_vv)
+GEN_VECTOR_R_VM(vmsle_vx)
+GEN_VECTOR_R_VM(vmsle_vi)
+GEN_VECTOR_R_VM(vmfgt_vf)
+GEN_VECTOR_R_VM(vmsgtu_vx)
+GEN_VECTOR_R_VM(vmsgtu_vi)
+GEN_VECTOR_R_VM(vmsgt_vx)
+GEN_VECTOR_R_VM(vmsgt_vi)
+GEN_VECTOR_R_VM(vmfge_vf)
+GEN_VECTOR_R_VM(vsaddu_vv)
+GEN_VECTOR_R_VM(vsaddu_vx)
+GEN_VECTOR_R_VM(vsaddu_vi)
+GEN_VECTOR_R_VM(vdivu_vv)
+GEN_VECTOR_R_VM(vdivu_vx)
+GEN_VECTOR_R_VM(vfdiv_vv)
+GEN_VECTOR_R_VM(vfdiv_vf)
+GEN_VECTOR_R_VM(vsadd_vv)
+GEN_VECTOR_R_VM(vsadd_vx)
+GEN_VECTOR_R_VM(vsadd_vi)
+GEN_VECTOR_R_VM(vdiv_vv)
+GEN_VECTOR_R_VM(vdiv_vx)
+GEN_VECTOR_R_VM(vfrdiv_vf)
+GEN_VECTOR_R_VM(vssubu_vv)
+GEN_VECTOR_R_VM(vssubu_vx)
+GEN_VECTOR_R_VM(vremu_vv)
+GEN_VECTOR_R_VM(vremu_vx)
+GEN_VECTOR_R_VM(vssub_vv)
+GEN_VECTOR_R_VM(vssub_vx)
+GEN_VECTOR_R_VM(vrem_vv)
+GEN_VECTOR_R_VM(vrem_vx)
+GEN_VECTOR_R_VM(vaadd_vv)
+GEN_VECTOR_R_VM(vaadd_vx)
+GEN_VECTOR_R_VM(vaadd_vi)
+GEN_VECTOR_R_VM(vmulhu_vv)
+GEN_VECTOR_R_VM(vmulhu_vx)
+GEN_VECTOR_R_VM(vfmul_vv)
+GEN_VECTOR_R_VM(vfmul_vf)
+GEN_VECTOR_R_VM(vsll_vv)
+GEN_VECTOR_R_VM(vsll_vx)
+GEN_VECTOR_R_VM(vsll_vi)
+GEN_VECTOR_R_VM(vmul_vv)
+GEN_VECTOR_R_VM(vmul_vx)
+GEN_VECTOR_R_VM(vasub_vv)
+GEN_VECTOR_R_VM(vasub_vx)
+GEN_VECTOR_R_VM(vmulhsu_vv)
+GEN_VECTOR_R_VM(vmulhsu_vx)
+GEN_VECTOR_R_VM(vsmul_vv)
+GEN_VECTOR_R_VM(vsmul_vx)
+GEN_VECTOR_R_VM(vmulh_vv)
+GEN_VECTOR_R_VM(vmulh_vx)
+GEN_VECTOR_R_VM(vfrsub_vf)
+GEN_VECTOR_R_VM(vsrl_vv)
+GEN_VECTOR_R_VM(vsrl_vx)
+GEN_VECTOR_R_VM(vsrl_vi)
+GEN_VECTOR_R_VM(vfmadd_vv)
+GEN_VECTOR_R_VM(vfmadd_vf)
+GEN_VECTOR_R_VM(vsra_vv)
+GEN_VECTOR_R_VM(vsra_vx)
+GEN_VECTOR_R_VM(vsra_vi)
+GEN_VECTOR_R_VM(vmadd_vv)
+GEN_VECTOR_R_VM(vmadd_vx)
+GEN_VECTOR_R_VM(vfnmadd_vv)
+GEN_VECTOR_R_VM(vfnmadd_vf)
+GEN_VECTOR_R_VM(vssrl_vv)
+GEN_VECTOR_R_VM(vssrl_vx)
+GEN_VECTOR_R_VM(vssrl_vi)
+GEN_VECTOR_R_VM(vfmsub_vv)
+GEN_VECTOR_R_VM(vfmsub_vf)
+GEN_VECTOR_R_VM(vssra_vv)
+GEN_VECTOR_R_VM(vssra_vx)
+GEN_VECTOR_R_VM(vssra_vi)
+GEN_VECTOR_R_VM(vnmsub_vv)
+GEN_VECTOR_R_VM(vnmsub_vx)
+GEN_VECTOR_R_VM(vfnmsub_vv)
+GEN_VECTOR_R_VM(vfnmsub_vf)
+GEN_VECTOR_R_VM(vnsrl_vv)
+GEN_VECTOR_R_VM(vnsrl_vx)
+GEN_VECTOR_R_VM(vnsrl_vi)
+GEN_VECTOR_R_VM(vfmacc_vv)
+GEN_VECTOR_R_VM(vfmacc_vf)
+GEN_VECTOR_R_VM(vnsra_vv)
+GEN_VECTOR_R_VM(vnsra_vx)
+GEN_VECTOR_R_VM(vnsra_vi)
+GEN_VECTOR_R_VM(vmacc_vv)
+GEN_VECTOR_R_VM(vmacc_vx)
+GEN_VECTOR_R_VM(vfnmacc_vv)
+GEN_VECTOR_R_VM(vfnmacc_vf)
+GEN_VECTOR_R_VM(vnclipu_vv)
+GEN_VECTOR_R_VM(vnclipu_vx)
+GEN_VECTOR_R_VM(vnclipu_vi)
+GEN_VECTOR_R_VM(vfmsac_vv)
+GEN_VECTOR_R_VM(vfmsac_vf)
+GEN_VECTOR_R_VM(vnclip_vv)
+GEN_VECTOR_R_VM(vnclip_vx)
+GEN_VECTOR_R_VM(vnclip_vi)
+GEN_VECTOR_R_VM(vnmsac_vv)
+GEN_VECTOR_R_VM(vnmsac_vx)
+GEN_VECTOR_R_VM(vfnmsac_vv)
+GEN_VECTOR_R_VM(vfnmsac_vf)
+GEN_VECTOR_R_VM(vwredsumu_vs)
+GEN_VECTOR_R_VM(vwaddu_vv)
+GEN_VECTOR_R_VM(vwaddu_vx)
+GEN_VECTOR_R_VM(vfwadd_vv)
+GEN_VECTOR_R_VM(vfwadd_vf)
+GEN_VECTOR_R_VM(vwredsum_vs)
+GEN_VECTOR_R_VM(vwadd_vv)
+GEN_VECTOR_R_VM(vwadd_vx)
+GEN_VECTOR_R_VM(vfwredsum_vs)
+GEN_VECTOR_R_VM(vwsubu_vv)
+GEN_VECTOR_R_VM(vwsubu_vx)
+GEN_VECTOR_R_VM(vfwsub_vv)
+GEN_VECTOR_R_VM(vfwsub_vf)
+GEN_VECTOR_R_VM(vwsub_vv)
+GEN_VECTOR_R_VM(vwsub_vx)
+GEN_VECTOR_R_VM(vfwredosum_vs)
+GEN_VECTOR_R_VM(vwaddu_wv)
+GEN_VECTOR_R_VM(vwaddu_wx)
+GEN_VECTOR_R_VM(vfwadd_wv)
+GEN_VECTOR_R_VM(vfwadd_wf)
+GEN_VECTOR_R_VM(vwadd_wv)
+GEN_VECTOR_R_VM(vwadd_wx)
+GEN_VECTOR_R_VM(vwsubu_wv)
+GEN_VECTOR_R_VM(vwsubu_wx)
+GEN_VECTOR_R_VM(vfwsub_wv)
+GEN_VECTOR_R_VM(vfwsub_wf)
+GEN_VECTOR_R_VM(vwsub_wv)
+GEN_VECTOR_R_VM(vwsub_wx)
+GEN_VECTOR_R_VM(vwmulu_vv)
+GEN_VECTOR_R_VM(vwmulu_vx)
+GEN_VECTOR_R_VM(vfwmul_vv)
+GEN_VECTOR_R_VM(vfwmul_vf)
+GEN_VECTOR_R_VM(vwmulsu_vv)
+GEN_VECTOR_R_VM(vwmulsu_vx)
+GEN_VECTOR_R_VM(vwmul_vv)
+GEN_VECTOR_R_VM(vwmul_vx)
+GEN_VECTOR_R_VM(vwsmaccu_vv)
+GEN_VECTOR_R_VM(vwsmaccu_vx)
+GEN_VECTOR_R_VM(vwmaccu_vv)
+GEN_VECTOR_R_VM(vwmaccu_vx)
+GEN_VECTOR_R_VM(vfwmacc_vv)
+GEN_VECTOR_R_VM(vfwmacc_vf)
+GEN_VECTOR_R_VM(vwsmacc_vv)
+GEN_VECTOR_R_VM(vwsmacc_vx)
+GEN_VECTOR_R_VM(vwmacc_vv)
+GEN_VECTOR_R_VM(vwmacc_vx)
+GEN_VECTOR_R_VM(vfwnmacc_vv)
+GEN_VECTOR_R_VM(vfwnmacc_vf)
+GEN_VECTOR_R_VM(vwsmaccsu_vv)
+GEN_VECTOR_R_VM(vwsmaccsu_vx)
+GEN_VECTOR_R_VM(vwmaccsu_vv)
+GEN_VECTOR_R_VM(vwmaccsu_vx)
+GEN_VECTOR_R_VM(vfwmsac_vv)
+GEN_VECTOR_R_VM(vfwmsac_vf)
+GEN_VECTOR_R_VM(vwsmaccus_vx)
+GEN_VECTOR_R_VM(vwmaccus_vx)
+GEN_VECTOR_R_VM(vfwnmsac_vv)
+GEN_VECTOR_R_VM(vfwnmsac_vf)
+GEN_VECTOR_R2_ZIMM(vsetvli)
+GEN_VECTOR_R(vsetvl)
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 8d6ab73..587c23e 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -706,6 +706,7 @@ static bool gen_shift(DisasContext *ctx, arg_r *a,
#include "insn_trans/trans_rva.inc.c"
#include "insn_trans/trans_rvf.inc.c"
#include "insn_trans/trans_rvd.inc.c"
+#include "insn_trans/trans_rvv.inc.c"
#include "insn_trans/trans_privileged.inc.c"
/*
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
new file mode 100644
index 0000000..1f8f1ec
--- /dev/null
+++ b/target/riscv/vector_helper.c
@@ -0,0 +1,26563 @@
+/*
+ * RISC-V Vectore Extension Helpers for QEMU.
+ *
+ * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "cpu.h"
+#include "qemu/main-loop.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "exec/translator.h"
+#include "exec/cpu_ldst.h"
+#include <math.h>
+#include "instmap.h"
+
+#define VECTOR_HELPER(name) HELPER(glue(vector_, name))
+#define SIGNBIT8 (1 << 7)
+#define MAX_U8 ((uint8_t)0xff)
+#define MIN_U8 ((uint8_t)0x0)
+#define MAX_S8 ((int8_t)0x7f)
+#define MIN_S8 ((int8_t)0x80)
+#define SIGNBIT16 (1 << 15)
+#define MAX_U16 ((uint16_t)0xffff)
+#define MIN_U16 ((uint16_t)0x0)
+#define MAX_S16 ((int16_t)0x7fff)
+#define MIN_S16 ((int16_t)0x8000)
+#define SIGNBIT32 (1 << 31)
+#define MAX_U32 ((uint32_t)0xffffffff)
+#define MIN_U32 ((uint32_t)0x0)
+#define MAX_S32 ((int32_t)0x7fffffff)
+#define MIN_S32 ((int32_t)0x80000000)
+#define SIGNBIT64 ((uint64_t)1 << 63)
+#define MAX_U64 ((uint64_t)0xffffffffffffffff)
+#define MIN_U64 ((uint64_t)0x0)
+#define MAX_S64 ((int64_t)0x7fffffffffffffff)
+#define MIN_S64 ((int64_t)0x8000000000000000)
+
+static int64_t sign_extend(int64_t a, int8_t width)
+{
+ return a << (64 - width) >> (64 - width);
+}
+
+static int64_t extend_gpr(target_ulong reg)
+{
+ return sign_extend(reg, sizeof(target_ulong) * 8);
+}
+
+static target_ulong vector_get_index(CPURISCVState *env, int rs1, int rs2,
+ int index, int mem, int width, int nf)
+{
+ target_ulong abs_off, base = env->gpr[rs1];
+ target_long offset;
+ switch (width) {
+ case 8:
+ offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + nf * mem;
+ break;
+ case 16:
+ offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + nf * mem;
+ break;
+ case 32:
+ offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + nf * mem;
+ break;
+ case 64:
+ offset = env->vfp.vreg[rs2].s64[index] + nf * mem;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return 0;
+ }
+ if (offset < 0) {
+ abs_off = ~offset + 1;
+ if (base >= abs_off) {
+ return base - abs_off;
+ }
+ } else {
+ if ((target_ulong)((target_ulong)offset + base) >= base) {
+ return (target_ulong)offset + base;
+ }
+ }
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return 0;
+}
+
+
+
+/* ADD/SUB/COMPARE instructions. */
+static inline uint8_t sat_add_u8(CPURISCVState *env, uint8_t a, uint8_t b)
+{
+ uint8_t res = a + b;
+ if (res < a) {
+ res = MAX_U8;
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint16_t sat_add_u16(CPURISCVState *env, uint16_t a, uint16_t b)
+{
+ uint16_t res = a + b;
+ if (res < a) {
+ res = MAX_U16;
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint32_t sat_add_u32(CPURISCVState *env, uint32_t a, uint32_t b)
+{
+ uint32_t res = a + b;
+ if (res < a) {
+ res = MAX_U32;
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint64_t sat_add_u64(CPURISCVState *env, uint64_t a, uint64_t b)
+{
+ uint64_t res = a + b;
+ if (res < a) {
+ res = MAX_U64;
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint8_t sat_add_s8(CPURISCVState *env, uint8_t a, uint8_t b)
+{
+ uint8_t res = a + b;
+ if (((res ^ a) & SIGNBIT8) && !((a ^ b) & SIGNBIT8)) {
+ res = ~(((int8_t)a >> 7) ^ SIGNBIT8);
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint16_t sat_add_s16(CPURISCVState *env, uint16_t a, uint16_t b)
+{
+ uint16_t res = a + b;
+ if (((res ^ a) & SIGNBIT16) && !((a ^ b) & SIGNBIT16)) {
+ res = ~(((int16_t)a >> 15) ^ SIGNBIT16);
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint32_t sat_add_s32(CPURISCVState *env, uint32_t a, uint32_t b)
+{
+ uint32_t res = a + b;
+ if (((res ^ a) & SIGNBIT32) && !((a ^ b) & SIGNBIT32)) {
+ res = ~(((int32_t)a >> 31) ^ SIGNBIT32);
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint64_t sat_add_s64(CPURISCVState *env, uint64_t a, uint64_t b)
+{
+ uint64_t res = a + b;
+ if (((res ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
+ res = ~(((int64_t)a >> 63) ^ SIGNBIT64);
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint8_t sat_sub_u8(CPURISCVState *env, uint8_t a, uint8_t b)
+{
+ uint8_t res = a - b;
+ if (res > a) {
+ res = 0;
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint16_t sat_sub_u16(CPURISCVState *env, uint16_t a, uint16_t b)
+{
+ uint16_t res = a - b;
+ if (res > a) {
+ res = 0;
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint32_t sat_sub_u32(CPURISCVState *env, uint32_t a, uint32_t b)
+{
+ uint32_t res = a - b;
+ if (res > a) {
+ res = 0;
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint64_t sat_sub_u64(CPURISCVState *env, uint64_t a, uint64_t b)
+{
+ uint64_t res = a - b;
+ if (res > a) {
+ res = 0;
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint8_t sat_sub_s8(CPURISCVState *env, uint8_t a, uint8_t b)
+{
+ uint8_t res = a - b;
+ if (((res ^ a) & SIGNBIT8) && ((a ^ b) & SIGNBIT8)) {
+ res = ~(((int8_t)a >> 7) ^ SIGNBIT8);
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint16_t sat_sub_s16(CPURISCVState *env, uint16_t a, uint16_t b)
+{
+ uint16_t res = a - b;
+ if (((res ^ a) & SIGNBIT16) && ((a ^ b) & SIGNBIT16)) {
+ res = ~(((int16_t)a >> 15) ^ SIGNBIT16);
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint32_t sat_sub_s32(CPURISCVState *env, uint32_t a, uint32_t b)
+{
+ uint32_t res = a - b;
+ if (((res ^ a) & SIGNBIT32) && ((a ^ b) & SIGNBIT32)) {
+ res = ~(((int32_t)a >> 31) ^ SIGNBIT32);
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static inline uint64_t sat_sub_s64(CPURISCVState *env, uint64_t a, uint64_t b)
+{
+ uint64_t res = a - b;
+ if (((res ^ a) & SIGNBIT64) && ((a ^ b) & SIGNBIT64)) {
+ res = ~(((int64_t)a >> 63) ^ SIGNBIT64);
+ env->vfp.vxsat = 0x1;
+
+ }
+ return res;
+}
+
+static uint64_t fix_data_round(CPURISCVState *env, uint64_t result,
+ uint8_t shift)
+{
+ uint64_t lsb_1 = (uint64_t)1 << shift;
+ int mod = env->vfp.vxrm;
+ int mask = ((uint64_t)1 << shift) - 1;
+
+ if (mod == 0x0) { /* rnu */
+ return lsb_1 >> 1;
+ } else if (mod == 0x1) { /* rne */
+ if ((result & mask) > (lsb_1 >> 1) ||
+ (((result & mask) == (lsb_1 >> 1)) &&
+ (((result >> shift) & 0x1)) == 1)) {
+ return lsb_1 >> 1;
+ }
+ } else if (mod == 0x3) { /* rod */
+ if (((result & mask) >= 0x1) && (((result >> shift) & 0x1) == 0)) {
+ return lsb_1;
+ }
+ }
+ return 0;
+}
+
+static int8_t saturate_s8(CPURISCVState *env, int16_t res)
+{
+ if (res > MAX_S8) {
+ env->vfp.vxsat = 0x1;
+ return MAX_S8;
+ } else if (res < MIN_S8) {
+ env->vfp.vxsat = 0x1;
+ return MIN_S8;
+ } else {
+ return res;
+ }
+}
+
+static uint8_t saturate_u8(CPURISCVState *env, uint16_t res)
+{
+ if (res > MAX_U8) {
+ env->vfp.vxsat = 0x1;
+ return MAX_U8;
+ } else {
+ return res;
+ }
+}
+
+static uint16_t saturate_u16(CPURISCVState *env, uint32_t res)
+{
+ if (res > MAX_U16) {
+ env->vfp.vxsat = 0x1;
+ return MAX_U16;
+ } else {
+ return res;
+ }
+}
+
+static uint32_t saturate_u32(CPURISCVState *env, uint64_t res)
+{
+ if (res > MAX_U32) {
+ env->vfp.vxsat = 0x1;
+ return MAX_U32;
+ } else {
+ return res;
+ }
+}
+
+static int16_t saturate_s16(CPURISCVState *env, int32_t res)
+{
+ if (res > MAX_S16) {
+ env->vfp.vxsat = 0x1;
+ return MAX_S16;
+ } else if (res < MIN_S16) {
+ env->vfp.vxsat = 0x1;
+ return MIN_S16;
+ } else {
+ return res;
+ }
+}
+
+static int32_t saturate_s32(CPURISCVState *env, int64_t res)
+{
+ if (res > MAX_S32) {
+ env->vfp.vxsat = 0x1;
+ return MAX_S32;
+ } else if (res < MIN_S32) {
+ env->vfp.vxsat = 0x1;
+ return MIN_S32;
+ } else {
+ return res;
+ }
+}
+static uint16_t vwsmaccu_8(CPURISCVState *env, uint8_t a, uint8_t b,
+ uint16_t c)
+{
+ uint16_t round, res;
+ uint16_t product = (uint16_t)a * (uint16_t)b;
+
+ round = (uint16_t)fix_data_round(env, (uint64_t)product, 4);
+ res = (round + product) >> 4;
+ return sat_add_u16(env, c, res);
+}
+
+static uint32_t vwsmaccu_16(CPURISCVState *env, uint16_t a, uint16_t b,
+ uint32_t c)
+{
+ uint32_t round, res;
+ uint32_t product = (uint32_t)a * (uint32_t)b;
+
+ round = (uint32_t)fix_data_round(env, (uint64_t)product, 8);
+ res = (round + product) >> 8;
+ return sat_add_u32(env, c, res);
+}
+
+static uint64_t vwsmaccu_32(CPURISCVState *env, uint32_t a, uint32_t b,
+ uint64_t c)
+{
+ uint64_t round, res;
+ uint64_t product = (uint64_t)a * (uint64_t)b;
+
+ round = (uint64_t)fix_data_round(env, (uint64_t)product, 16);
+ res = (round + product) >> 16;
+ return sat_add_u64(env, c, res);
+}
+
+static int16_t vwsmacc_8(CPURISCVState *env, int8_t a, int8_t b,
+ int16_t c)
+{
+ int16_t round, res;
+ int16_t product = (int16_t)a * (int16_t)b;
+
+ round = (int16_t)fix_data_round(env, (uint64_t)product, 4);
+ res = (int16_t)(round + product) >> 4;
+ return sat_add_s16(env, c, res);
+}
+
+static int32_t vwsmacc_16(CPURISCVState *env, int16_t a, int16_t b,
+ int32_t c)
+{
+ int32_t round, res;
+ int32_t product = (int32_t)a * (int32_t)b;
+
+ round = (int32_t)fix_data_round(env, (uint64_t)product, 8);
+ res = (int32_t)(round + product) >> 8;
+ return sat_add_s32(env, c, res);
+}
+
+static int64_t vwsmacc_32(CPURISCVState *env, int32_t a, int32_t b,
+ int64_t c)
+{
+ int64_t round, res;
+ int64_t product = (int64_t)a * (int64_t)b;
+
+ round = (int64_t)fix_data_round(env, (uint64_t)product, 16);
+ res = (int64_t)(round + product) >> 16;
+ return sat_add_s64(env, c, res);
+}
+
+static int16_t vwsmaccsu_8(CPURISCVState *env, uint8_t a, int8_t b,
+ int16_t c)
+{
+ int16_t round, res;
+ int16_t product = (uint16_t)a * (int16_t)b;
+
+ round = (int16_t)fix_data_round(env, (uint64_t)product, 4);
+ res = (round + product) >> 4;
+ return sat_sub_s16(env, c, res);
+}
+
+static int32_t vwsmaccsu_16(CPURISCVState *env, uint16_t a, int16_t b,
+ uint32_t c)
+{
+ int32_t round, res;
+ int32_t product = (uint32_t)a * (int32_t)b;
+
+ round = (int32_t)fix_data_round(env, (uint64_t)product, 8);
+ res = (round + product) >> 8;
+ return sat_sub_s32(env, c, res);
+}
+
+static int64_t vwsmaccsu_32(CPURISCVState *env, uint32_t a, int32_t b,
+ int64_t c)
+{
+ int64_t round, res;
+ int64_t product = (uint64_t)a * (int64_t)b;
+
+ round = (int64_t)fix_data_round(env, (uint64_t)product, 16);
+ res = (round + product) >> 16;
+ return sat_sub_s64(env, c, res);
+}
+
+static int16_t vwsmaccus_8(CPURISCVState *env, int8_t a, uint8_t b,
+ int16_t c)
+{
+ int16_t round, res;
+ int16_t product = (int16_t)a * (uint16_t)b;
+
+ round = (int16_t)fix_data_round(env, (uint64_t)product, 4);
+ res = (round + product) >> 4;
+ return sat_sub_s16(env, c, res);
+}
+
+static int32_t vwsmaccus_16(CPURISCVState *env, int16_t a, uint16_t b,
+ int32_t c)
+{
+ int32_t round, res;
+ int32_t product = (int32_t)a * (uint32_t)b;
+
+ round = (int32_t)fix_data_round(env, (uint64_t)product, 8);
+ res = (round + product) >> 8;
+ return sat_sub_s32(env, c, res);
+}
+
+static uint64_t vwsmaccus_32(CPURISCVState *env, int32_t a, uint32_t b,
+ int64_t c)
+{
+ int64_t round, res;
+ int64_t product = (int64_t)a * (uint64_t)b;
+
+ round = (int64_t)fix_data_round(env, (uint64_t)product, 16);
+ res = (round + product) >> 16;
+ return sat_sub_s64(env, c, res);
+}
+
+static int8_t vssra_8(CPURISCVState *env, int8_t a, uint8_t b)
+{
+ int16_t round, res;
+ uint8_t shift = b & 0x7;
+
+ round = (int16_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a + round) >> shift;
+
+ return res;
+}
+
+static int16_t vssra_16(CPURISCVState *env, int16_t a, uint16_t b)
+{
+ int32_t round, res;
+ uint8_t shift = b & 0xf;
+
+ round = (int32_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a + round) >> shift;
+ return res;
+}
+
+static int32_t vssra_32(CPURISCVState *env, int32_t a, uint32_t b)
+{
+ int64_t round, res;
+ uint8_t shift = b & 0x1f;
+
+ round = (int64_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a + round) >> shift;
+ return res;
+}
+
+static int64_t vssra_64(CPURISCVState *env, int64_t a, uint64_t b)
+{
+ int64_t round, res;
+ uint8_t shift = b & 0x3f;
+
+ round = (int64_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a >> (shift - 1)) + (round >> (shift - 1));
+ return res >> 1;
+}
+
+static int8_t vssrai_8(CPURISCVState *env, int8_t a, uint8_t b)
+{
+ int16_t round, res;
+
+ round = (int16_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a + round) >> b;
+ return res;
+}
+
+static int16_t vssrai_16(CPURISCVState *env, int16_t a, uint8_t b)
+{
+ int32_t round, res;
+
+ round = (int32_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a + round) >> b;
+ return res;
+}
+
+static int32_t vssrai_32(CPURISCVState *env, int32_t a, uint8_t b)
+{
+ int64_t round, res;
+
+ round = (int64_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a + round) >> b;
+ return res;
+}
+
+static int64_t vssrai_64(CPURISCVState *env, int64_t a, uint8_t b)
+{
+ int64_t round, res;
+
+ round = (int64_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a >> (b - 1)) + (round >> (b - 1));
+ return res >> 1;
+}
+
+static int8_t vnclip_16(CPURISCVState *env, int16_t a, uint8_t b)
+{
+ int16_t round, res;
+ uint8_t shift = b & 0xf;
+
+ round = (int16_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a + round) >> shift;
+
+ return saturate_s8(env, res);
+}
+
+static int16_t vnclip_32(CPURISCVState *env, int32_t a, uint16_t b)
+{
+ int32_t round, res;
+ uint8_t shift = b & 0x1f;
+
+ round = (int32_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a + round) >> shift;
+ return saturate_s16(env, res);
+}
+
+static int32_t vnclip_64(CPURISCVState *env, int64_t a, uint32_t b)
+{
+ int64_t round, res;
+ uint8_t shift = b & 0x3f;
+
+ round = (int64_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a + round) >> shift;
+
+ return saturate_s32(env, res);
+}
+
+static int8_t vnclipi_16(CPURISCVState *env, int16_t a, uint8_t b)
+{
+ int16_t round, res;
+
+ round = (int16_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a + round) >> b;
+
+ return saturate_s8(env, res);
+}
+
+static int16_t vnclipi_32(CPURISCVState *env, int32_t a, uint8_t b)
+{
+ int32_t round, res;
+
+ round = (int32_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a + round) >> b;
+
+ return saturate_s16(env, res);
+}
+
+static int32_t vnclipi_64(CPURISCVState *env, int64_t a, uint8_t b)
+{
+ int32_t round, res;
+
+ round = (int64_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a + round) >> b;
+
+ return saturate_s32(env, res);
+}
+
+static uint8_t vnclipu_16(CPURISCVState *env, uint16_t a, uint8_t b)
+{
+ uint16_t round, res;
+ uint8_t shift = b & 0xf;
+
+ round = (uint16_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a + round) >> shift;
+
+ return saturate_u8(env, res);
+}
+
+static uint16_t vnclipu_32(CPURISCVState *env, uint32_t a, uint16_t b)
+{
+ uint32_t round, res;
+ uint8_t shift = b & 0x1f;
+
+ round = (uint32_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a + round) >> shift;
+
+ return saturate_u16(env, res);
+}
+
+static uint32_t vnclipu_64(CPURISCVState *env, uint64_t a, uint32_t b)
+{
+ uint64_t round, res;
+ uint8_t shift = b & 0x3f;
+
+ round = (uint64_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a + round) >> shift;
+
+ return saturate_u32(env, res);
+}
+
+static uint8_t vnclipui_16(CPURISCVState *env, uint16_t a, uint8_t b)
+{
+ uint16_t round, res;
+
+ round = (uint16_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a + round) >> b;
+
+ return saturate_u8(env, res);
+}
+
+static uint16_t vnclipui_32(CPURISCVState *env, uint32_t a, uint8_t b)
+{
+ uint32_t round, res;
+
+ round = (uint32_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a + round) >> b;
+
+ return saturate_u16(env, res);
+}
+
+static uint32_t vnclipui_64(CPURISCVState *env, uint64_t a, uint8_t b)
+{
+ uint64_t round, res;
+
+ round = (uint64_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a + round) >> b;
+
+ return saturate_u32(env, res);
+}
+
+static uint8_t vssrl_8(CPURISCVState *env, uint8_t a, uint8_t b)
+{
+ uint16_t round, res;
+ uint8_t shift = b & 0x7;
+
+ round = (uint16_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a + round) >> shift;
+ return res;
+}
+
+static uint16_t vssrl_16(CPURISCVState *env, uint16_t a, uint16_t b)
+{
+ uint32_t round, res;
+ uint8_t shift = b & 0xf;
+
+ round = (uint32_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a + round) >> shift;
+ return res;
+}
+
+static uint32_t vssrl_32(CPURISCVState *env, uint32_t a, uint32_t b)
+{
+ uint64_t round, res;
+ uint8_t shift = b & 0x1f;
+
+ round = (uint64_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a + round) >> shift;
+ return res;
+}
+
+static uint64_t vssrl_64(CPURISCVState *env, uint64_t a, uint64_t b)
+{
+ uint64_t round, res;
+ uint8_t shift = b & 0x3f;
+
+ round = (uint64_t)fix_data_round(env, (uint64_t)a, shift);
+ res = (a >> (shift - 1)) + (round >> (shift - 1));
+ return res >> 1;
+}
+
+static uint8_t vssrli_8(CPURISCVState *env, uint8_t a, uint8_t b)
+{
+ uint16_t round, res;
+
+ round = (uint16_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a + round) >> b;
+ return res;
+}
+
+static uint16_t vssrli_16(CPURISCVState *env, uint16_t a, uint8_t b)
+{
+ uint32_t round, res;
+
+ round = (uint32_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a + round) >> b;
+ return res;
+}
+
+static uint32_t vssrli_32(CPURISCVState *env, uint32_t a, uint8_t b)
+{
+ uint64_t round, res;
+
+ round = (uint64_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a + round) >> b;
+ return res;
+}
+
+static uint64_t vssrli_64(CPURISCVState *env, uint64_t a, uint8_t b)
+{
+ uint64_t round, res;
+
+ round = (uint64_t)fix_data_round(env, (uint64_t)a, b);
+ res = (a >> (b - 1)) + (round >> (b - 1));
+ return res >> 1;
+}
+
+static int8_t vsmul_8(CPURISCVState *env, int8_t a, int8_t b)
+{
+ int16_t round;
+ int8_t res;
+ int16_t product = (int16_t)a * (int16_t)b;
+
+ if (a == MIN_S8 && b == MIN_S8) {
+ env->vfp.vxsat = 1;
+
+ return MAX_S8;
+ }
+
+ round = (int16_t)fix_data_round(env, (uint64_t)product, 7);
+ res = sat_add_s16(env, product, round) >> 7;
+ return res;
+}
+
+
+static int16_t vsmul_16(CPURISCVState *env, int16_t a, int16_t b)
+{
+ int32_t round;
+ int16_t res;
+ int32_t product = (int32_t)a * (int32_t)b;
+
+ if (a == MIN_S16 && b == MIN_S16) {
+ env->vfp.vxsat = 1;
+
+ return MAX_S16;
+ }
+
+ round = (int32_t)fix_data_round(env, (uint64_t)product, 15);
+ res = sat_add_s32(env, product, round) >> 15;
+ return res;
+}
+
+static int32_t vsmul_32(CPURISCVState *env, int32_t a, int32_t b)
+{
+ int64_t round;
+ int32_t res;
+ int64_t product = (int64_t)a * (int64_t)b;
+
+ if (a == MIN_S32 && b == MIN_S32) {
+ env->vfp.vxsat = 1;
+
+ return MAX_S32;
+ }
+
+ round = (int64_t)fix_data_round(env, (uint64_t)product, 31);
+ res = sat_add_s64(env, product, round) >> 31;
+ return res;
+}
+
+
+static int64_t vsmul_64(CPURISCVState *env, int64_t a, int64_t b)
+{
+ int64_t res;
+ uint64_t abs_a = a, abs_b = b;
+ uint64_t lo_64, hi_64, carry, round;
+
+ if (a == MIN_S64 && b == MIN_S64) {
+ env->vfp.vxsat = 1;
+
+ return MAX_S64;
+ }
+
+ if (a < 0) {
+ abs_a = ~a + 1;
+ }
+ if (b < 0) {
+ abs_b = ~b + 1;
+ }
+
+ /* first get the whole product in {hi_64, lo_64} */
+ uint64_t a_hi = abs_a >> 32;
+ uint64_t a_lo = (uint32_t)abs_a;
+ uint64_t b_hi = abs_b >> 32;
+ uint64_t b_lo = (uint32_t)abs_b;
+
+ /*
+ * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
+ * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
+ * (a_lo * b_hi) << 32 + a_lo * b_lo
+ * = {hi_64, lo_64}
+ * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * b_lo)) >> 64
+ * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
+ * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
+ * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) >> 32
+ */
+
+ lo_64 = abs_a * abs_b;
+ carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
+ (uint64_t)(uint32_t)(a_lo * b_hi) +
+ ((a_lo * b_lo) >> 32)) >> 32;
+
+ hi_64 = a_hi * b_hi +
+ ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
+ carry;
+
+ if ((a ^ b) & SIGNBIT64) {
+ lo_64 = ~lo_64;
+ hi_64 = ~hi_64;
+ if (lo_64 == MAX_U64) {
+ lo_64 = 0;
+ hi_64 += 1;
+ } else {
+ lo_64 += 1;
+ }
+ }
+
+ /* set rem and res */
+ round = fix_data_round(env, lo_64, 63);
+ if ((lo_64 + round) < lo_64) {
+ hi_64 += 1;
+ res = (hi_64 << 1);
+ } else {
+ res = (hi_64 << 1) | ((lo_64 + round) >> 63);
+ }
+
+ return res;
+}
+static inline int8_t avg_round_s8(CPURISCVState *env, int8_t a, int8_t b)
+{
+ int16_t round;
+ int8_t res;
+ int16_t sum = a + b;
+
+ round = (int16_t)fix_data_round(env, (uint64_t)sum, 1);
+ res = (sum + round) >> 1;
+
+ return res;
+}
+
+static inline int16_t avg_round_s16(CPURISCVState *env, int16_t a, int16_t b)
+{
+ int32_t round;
+ int16_t res;
+ int32_t sum = a + b;
+
+ round = (int32_t)fix_data_round(env, (uint64_t)sum, 1);
+ res = (sum + round) >> 1;
+
+ return res;
+}
+
+static inline int32_t avg_round_s32(CPURISCVState *env, int32_t a, int32_t b)
+{
+ int64_t round;
+ int32_t res;
+ int64_t sum = a + b;
+
+ round = (int64_t)fix_data_round(env, (uint64_t)sum, 1);
+ res = (sum + round) >> 1;
+
+ return res;
+}
+
+static inline int64_t avg_round_s64(CPURISCVState *env, int64_t a, int64_t b)
+{
+ int64_t rem = (a & 0x1) + (b & 0x1);
+ int64_t res = (a >> 1) + (b >> 1) + (rem >> 1);
+ int mod = env->vfp.vxrm;
+
+ if (mod == 0x0) { /* rnu */
+ if (rem == 0x1) {
+ return res + 1;
+ }
+ } else if (mod == 0x1) { /* rne */
+ if ((rem & 0x1) == 1 && ((res & 0x1) == 1)) {
+ return res + 1;
+ }
+ } else if (mod == 0x3) { /* rod */
+ if (((rem & 0x1) >= 0x1) && (res & 0x1) == 0) {
+ return res + 1;
+ }
+ }
+ return res;
+}
+
+static target_ulong helper_fclass_h(uint64_t frs1)
+{
+ float16 f = frs1;
+ bool sign = float16_is_neg(f);
+
+ if (float16_is_infinity(f)) {
+ return sign ? 1 << 0 : 1 << 7;
+ } else if (float16_is_zero(f)) {
+ return sign ? 1 << 3 : 1 << 4;
+ } else if (float16_is_zero_or_denormal(f)) {
+ return sign ? 1 << 2 : 1 << 5;
+ } else if (float16_is_any_nan(f)) {
+ float_status s = { }; /* for snan_bit_is_one */
+ return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
+ } else {
+ return sign ? 1 << 1 : 1 << 6;
+ }
+}
+
+static inline bool vector_vtype_ill(CPURISCVState *env)
+{
+ if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) {
+ return true;
+ }
+ return false;
+}
+
+static inline void vector_vtype_set_ill(CPURISCVState *env)
+{
+ env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1);
+ return;
+}
+
+static inline int vector_vtype_get_sew(CPURISCVState *env)
+{
+ return (env->vfp.vtype >> 2) & 0x7;
+}
+
+static inline int vector_get_width(CPURISCVState *env)
+{
+ return 8 * (1 << vector_vtype_get_sew(env));
+}
+
+static inline int vector_get_lmul(CPURISCVState *env)
+{
+ return 1 << (env->vfp.vtype & 0x3);
+}
+
+static inline int vector_get_vlmax(CPURISCVState *env)
+{
+ return vector_get_lmul(env) * VLEN / vector_get_width(env);
+}
+
+static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int width,
+ int lmul, int index)
+{
+ int mlen = width / lmul;
+ int idx = (index * mlen) / 8;
+ int pos = (index * mlen) % 8;
+
+ return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1);
+}
+
+static inline bool vector_overlap_vm_common(int lmul, int vm, int rd)
+{
+ if (lmul > 1 && vm == 0 && rd == 0) {
+ return true;
+ }
+ return false;
+}
+
+static inline bool vector_overlap_vm_force(int vm, int rd)
+{
+ if (vm == 0 && rd == 0) {
+ return true;
+ }
+ return false;
+}
+
+static inline bool vector_overlap_carry(int lmul, int rd)
+{
+ if (lmul > 1 && rd == 0) {
+ return true;
+ }
+ return false;
+}
+
+static inline bool vector_overlap_dstgp_srcgp(int rd, int dlen, int rs,
+ int slen)
+{
+ if ((rd >= rs && rd < rs + slen) || (rs >= rd && rs < rd + dlen)) {
+ return true;
+ }
+ return false;
+}
+
+static inline uint64_t vector_get_mask(int start, int end)
+{
+ return ((uint64_t)(~((uint64_t)0))) << (63 - end + start) >> (63 - end);
+}
+
+/* fetch unsigned element by width */
+static inline uint64_t vector_get_iu_elem(CPURISCVState *env, uint32_t width,
+ uint32_t rs2, uint32_t index)
+{
+ uint64_t elem;
+ if (width == 8) {
+ elem = env->vfp.vreg[rs2].u8[index];
+ } else if (width == 16) {
+ elem = env->vfp.vreg[rs2].u16[index];
+ } else if (width == 32) {
+ elem = env->vfp.vreg[rs2].u32[index];
+ } else if (width == 64) {
+ elem = env->vfp.vreg[rs2].u64[index];
+ } else { /* the max of (XLEN, FLEN) is no bigger than 64 */
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return 0;
+ }
+ return elem;
+}
+
+static inline int vector_mask_reg(CPURISCVState *env, uint32_t reg, int width,
+ int lmul, int index)
+{
+ int mlen = width / lmul;
+ int idx = (index * mlen) / 8;
+ int pos = (index * mlen) % 8;
+ return (env->vfp.vreg[reg].u8[idx] >> pos) & 0x1;
+}
+
+static inline void vector_mask_result(CPURISCVState *env, uint32_t reg,
+ int width, int lmul, int index, uint32_t result)
+{
+ int mlen = width / lmul;
+ int idx = (index * mlen) / width;
+ int pos = (index * mlen) % width;
+ uint64_t mask = ~((((uint64_t)1 << mlen) - 1) << pos);
+
+ switch (width) {
+ case 8:
+ env->vfp.vreg[reg].u8[idx] = (env->vfp.vreg[reg].u8[idx] & mask)
+ | (result << pos);
+ break;
+ case 16:
+ env->vfp.vreg[reg].u16[idx] = (env->vfp.vreg[reg].u16[idx] & mask)
+ | (result << pos);
+ break;
+ case 32:
+ env->vfp.vreg[reg].u32[idx] = (env->vfp.vreg[reg].u32[idx] & mask)
+ | (result << pos);
+ break;
+ case 64:
+ env->vfp.vreg[reg].u64[idx] = (env->vfp.vreg[reg].u64[idx] & mask)
+ | ((uint64_t)result << pos);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+
+ return;
+}
+
+/**
+ * deposit16:
+ * @value: initial value to insert bit field into
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ * @fieldval: the value to insert into the bit field
+ *
+ * Deposit @fieldval into the 16 bit @value at the bit field specified
+ * by the @start and @length parameters, and return the modified
+ * @value. Bits of @value outside the bit field are not modified.
+ * Bits of @fieldval above the least significant @length bits are
+ * ignored. The bit field must lie entirely within the 16 bit word.
+ * It is valid to request that all 16 bits are modified (ie @length
+ * 16 and @start 0).
+ *
+ * Returns: the modified @value.
+ */
+static inline uint16_t deposit16(uint16_t value, int start, int length,
+ uint16_t fieldval)
+{
+ uint16_t mask;
+ assert(start >= 0 && length > 0 && length <= 16 - start);
+ mask = (~0U >> (16 - length)) << start;
+ return (value & ~mask) | ((fieldval << start) & mask);
+}
+
+static void vector_tail_amo(CPURISCVState *env, int vreg, int index, int width)
+{
+ switch (width) {
+ case 32:
+ env->vfp.vreg[vreg].u32[index] = 0;
+ break;
+ case 64:
+ env->vfp.vreg[vreg].u64[index] = 0;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+}
+
+static void vector_tail_common(CPURISCVState *env, int vreg, int index,
+ int width)
+{
+ switch (width) {
+ case 8:
+ env->vfp.vreg[vreg].u8[index] = 0;
+ break;
+ case 16:
+ env->vfp.vreg[vreg].u16[index] = 0;
+ break;
+ case 32:
+ env->vfp.vreg[vreg].u32[index] = 0;
+ break;
+ case 64:
+ env->vfp.vreg[vreg].u64[index] = 0;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+}
+
+static void vector_tail_segment(CPURISCVState *env, int vreg, int index,
+ int width, int nf, int lmul)
+{
+ switch (width) {
+ case 8:
+ while (nf >= 0) {
+ env->vfp.vreg[vreg + nf * lmul].u8[index] = 0;
+ nf--;
+ }
+ break;
+ case 16:
+ while (nf >= 0) {
+ env->vfp.vreg[vreg + nf * lmul].u16[index] = 0;
+ nf--;
+ }
+ break;
+ case 32:
+ while (nf >= 0) {
+ env->vfp.vreg[vreg + nf * lmul].u32[index] = 0;
+ nf--;
+ }
+ break;
+ case 64:
+ while (nf >= 0) {
+ env->vfp.vreg[vreg + nf * lmul].u64[index] = 0;
+ nf--;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+}
+
+static void vector_tail_widen(CPURISCVState *env, int vreg, int index,
+ int width)
+{
+ switch (width) {
+ case 8:
+ env->vfp.vreg[vreg].u16[index] = 0;
+ break;
+ case 16:
+ env->vfp.vreg[vreg].u32[index] = 0;
+ break;
+ case 32:
+ env->vfp.vreg[vreg].u64[index] = 0;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+}
+
+static void vector_tail_narrow(CPURISCVState *env, int vreg, int index,
+ int width)
+{
+ switch (width) {
+ case 8:
+ env->vfp.vreg[vreg].u8[index] = 0;
+ break;
+ case 16:
+ env->vfp.vreg[vreg].u16[index] = 0;
+ break;
+ case 32:
+ env->vfp.vreg[vreg].u32[index] = 0;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+}
+
+static void vector_tail_fcommon(CPURISCVState *env, int vreg, int index,
+ int width)
+{
+ switch (width) {
+ case 16:
+ env->vfp.vreg[vreg].u16[index] = 0;
+ break;
+ case 32:
+ env->vfp.vreg[vreg].u32[index] = 0;
+ break;
+ case 64:
+ env->vfp.vreg[vreg].u64[index] = 0;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+}
+
+static void vector_tail_fwiden(CPURISCVState *env, int vreg, int index,
+ int width)
+{
+ switch (width) {
+ case 16:
+ env->vfp.vreg[vreg].u32[index] = 0;
+ break;
+ case 32:
+ env->vfp.vreg[vreg].u64[index] = 0;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+}
+
+static void vector_tail_fnarrow(CPURISCVState *env, int vreg, int index,
+ int width)
+{
+ switch (width) {
+ case 16:
+ env->vfp.vreg[vreg].u16[index] = 0;
+ break;
+ case 32:
+ env->vfp.vreg[vreg].u32[index] = 0;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+}
+static inline int vector_get_carry(CPURISCVState *env, int width, int lmul,
+ int index)
+{
+ int mlen = width / lmul;
+ int idx = (index * mlen) / 8;
+ int pos = (index * mlen) % 8;
+
+ return (env->vfp.vreg[0].u8[idx] >> pos) & 0x1;
+}
+
+static inline void vector_get_layout(CPURISCVState *env, int width, int lmul,
+ int index, int *idx, int *pos)
+{
+ int mlen = width / lmul;
+ *idx = (index * mlen) / 8;
+ *pos = (index * mlen) % 8;
+}
+
+static bool vector_lmul_check_reg(CPURISCVState *env, uint32_t lmul,
+ uint32_t reg, bool widen)
+{
+ int legal = widen ? (lmul * 2) : lmul;
+
+ if ((lmul != 1 && lmul != 2 && lmul != 4 && lmul != 8) ||
+ (lmul == 8 && widen)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return false;
+ }
+
+ if (reg % legal != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return false;
+ }
+ return true;
+}
+
+static inline uint64_t u64xu64_lh(uint64_t a, uint64_t b)
+{
+ uint64_t hi_64, carry;
+
+
+ /* first get the whole product in {hi_64, lo_64} */
+ uint64_t a_hi = a >> 32;
+ uint64_t a_lo = (uint32_t)a;
+ uint64_t b_hi = b >> 32;
+ uint64_t b_lo = (uint32_t)b;
+
+ /*
+ * a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
+ * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
+ * (a_lo * b_hi) << 32 + a_lo * b_lo
+ * = {hi_64, lo_64}
+ * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * b_lo)) >> 64
+ * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
+ * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
+ * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) >> 32
+ */
+
+ carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
+ (uint64_t)(uint32_t)(a_lo * b_hi) +
+ ((a_lo * b_lo) >> 32)) >> 32;
+
+ hi_64 = a_hi * b_hi +
+ ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
+ carry;
+
+ return hi_64;
+}
+
+
+static inline int64_t s64xu64_lh(int64_t a, uint64_t b)
+{
+ uint64_t abs_a = a;
+ uint64_t lo_64, hi_64, carry;
+
+ if (a < 0) {
+ abs_a = ~a + 1;
+ }
+
+ /* first get the whole product in {hi_64, lo_64} */
+ uint64_t a_hi = abs_a >> 32;
+ uint64_t a_lo = (uint32_t)abs_a;
+ uint64_t b_hi = b >> 32;
+ uint64_t b_lo = (uint32_t)b;
+
+ /*
+ * abs_a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
+ * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
+ * (a_lo * b_hi) << 32 + a_lo * b_lo
+ * = {hi_64, lo_64}
+ * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * b_lo)) >> 64
+ * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
+ * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
+ * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) >> 32
+ */
+
+ lo_64 = abs_a * b;
+ carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
+ (uint64_t)(uint32_t)(a_lo * b_hi) +
+ ((a_lo * b_lo) >> 32)) >> 32;
+
+ hi_64 = a_hi * b_hi +
+ ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
+ carry;
+ if ((a ^ b) & SIGNBIT64) {
+ lo_64 = ~lo_64;
+ hi_64 = ~hi_64;
+ if (lo_64 == MAX_U64) {
+ lo_64 = 0;
+ hi_64 += 1;
+ } else {
+ lo_64 += 1;
+ }
+ }
+ return hi_64;
+}
+
+
+static inline int64_t s64xs64_lh(int64_t a, int64_t b)
+{
+ uint64_t abs_a = a, abs_b = b;
+ uint64_t lo_64, hi_64, carry;
+
+ if (a < 0) {
+ abs_a = ~a + 1;
+ }
+ if (b < 0) {
+ abs_b = ~b + 1;
+ }
+
+ /* first get the whole product in {hi_64, lo_64} */
+ uint64_t a_hi = abs_a >> 32;
+ uint64_t a_lo = (uint32_t)abs_a;
+ uint64_t b_hi = abs_b >> 32;
+ uint64_t b_lo = (uint32_t)abs_b;
+
+ /*
+ * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
+ * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
+ * (a_lo * b_hi) << 32 + a_lo * b_lo
+ * = {hi_64, lo_64}
+ * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * b_lo)) >> 64
+ * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
+ * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
+ * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) >> 32
+ */
+
+ lo_64 = abs_a * abs_b;
+ carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
+ (uint64_t)(uint32_t)(a_lo * b_hi) +
+ ((a_lo * b_lo) >> 32)) >> 32;
+
+ hi_64 = a_hi * b_hi +
+ ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
+ carry;
+
+ if ((a ^ b) & SIGNBIT64) {
+ lo_64 = ~lo_64;
+ hi_64 = ~hi_64;
+ if (lo_64 == MAX_U64) {
+ lo_64 = 0;
+ hi_64 += 1;
+ } else {
+ lo_64 += 1;
+ }
+ }
+ return hi_64;
+}
+
+void VECTOR_HELPER(vsetvl)(CPURISCVState *env, uint32_t rs1, uint32_t rs2,
+ uint32_t rd)
+{
+ int sew, max_sew, vlmax, vl;
+
+ if (rs2 == 0) {
+ vector_vtype_set_ill(env);
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ env->vfp.vtype = env->gpr[rs2];
+ sew = 1 << vector_get_width(env) / 8;
+ max_sew = sizeof(target_ulong);
+
+
+ if (env->misa & RVD) {
+ max_sew = max_sew > 8 ? max_sew : 8;
+ } else if (env->misa & RVF) {
+ max_sew = max_sew > 4 ? max_sew : 4;
+ }
+ if (sew > max_sew) {
+ vector_vtype_set_ill(env);
+ return;
+ }
+
+ vlmax = vector_get_vlmax(env);
+ if (rs1 == 0) {
+ vl = vlmax;
+ } else if (env->gpr[rs1] <= vlmax) {
+ vl = env->gpr[rs1];
+ } else if (env->gpr[rs1] < 2 * vlmax) {
+ vl = ceil(env->gpr[rs1] / 2);
+ } else {
+ vl = vlmax;
+ }
+ env->vfp.vl = vl;
+ env->gpr[rd] = vl;
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsetvli)(CPURISCVState *env, uint32_t rs1, uint32_t zimm,
+ uint32_t rd)
+{
+ int sew, max_sew, vlmax, vl;
+
+ env->vfp.vtype = zimm;
+ sew = vector_get_width(env) / 8;
+ max_sew = sizeof(target_ulong);
+
+ if (env->misa & RVD) {
+ max_sew = max_sew > 8 ? max_sew : 8;
+ } else if (env->misa & RVF) {
+ max_sew = max_sew > 4 ? max_sew : 4;
+ }
+ if (sew > max_sew) {
+ vector_vtype_set_ill(env);
+ return;
+ }
+
+ vlmax = vector_get_vlmax(env);
+ if (rs1 == 0) {
+ vl = vlmax;
+ } else if (env->gpr[rs1] <= vlmax) {
+ vl = env->gpr[rs1];
+ } else if (env->gpr[rs1] < 2 * vlmax) {
+ vl = ceil(env->gpr[rs1] / 2);
+ } else {
+ vl = vlmax;
+ }
+ env->vfp.vl = vl;
+ env->gpr[rd] = vl;
+ return;
+ env->vfp.vstart = 0;
+}
+
+/*
+ * vrgather.vv vd, vs2, vs1, vm #
+ * vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]];
+ */
+void VECTOR_HELPER(vrgather_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src, src1;
+ uint32_t index;
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ index = env->vfp.vreg[src1].u8[j];
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (index >= vlmax) {
+ env->vfp.vreg[dest].u8[j] = 0;
+ } else {
+ src = rs2 + (index / (VLEN / width));
+ index = index % (VLEN / width);
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src].u8[index];
+ }
+ }
+ break;
+ case 16:
+ index = env->vfp.vreg[src1].u16[j];
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (index >= vlmax) {
+ env->vfp.vreg[dest].u16[j] = 0;
+ } else {
+ src = rs2 + (index / (VLEN / width));
+ index = index % (VLEN / width);
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src].u16[index];
+ }
+ }
+ break;
+ case 32:
+ index = env->vfp.vreg[src1].u32[j];
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (index >= vlmax) {
+ env->vfp.vreg[dest].u32[j] = 0;
+ } else {
+ src = rs2 + (index / (VLEN / width));
+ index = index % (VLEN / width);
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src].u32[index];
+ }
+ }
+ break;
+ case 64:
+ index = env->vfp.vreg[src1].u64[j];
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (index >= vlmax) {
+ env->vfp.vreg[dest].u64[j] = 0;
+ } else {
+ src = rs2 + (index / (VLEN / width));
+ index = index % (VLEN / width);
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src].u64[index];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
+void VECTOR_HELPER(vrgather_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src;
+ uint32_t index;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ index = env->gpr[rs1];
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (index >= vlmax) {
+ env->vfp.vreg[dest].u8[j] = 0;
+ } else {
+ src = rs2 + (index / (VLEN / width));
+ index = index % (VLEN / width);
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src].u8[index];
+ }
+ }
+ break;
+ case 16:
+ index = env->gpr[rs1];
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (index >= vlmax) {
+ env->vfp.vreg[dest].u16[j] = 0;
+ } else {
+ src = rs2 + (index / (VLEN / width));
+ index = index % (VLEN / width);
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src].u16[index];
+ }
+ }
+ break;
+ case 32:
+ index = env->gpr[rs1];
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (index >= vlmax) {
+ env->vfp.vreg[dest].u32[j] = 0;
+ } else {
+ src = rs2 + (index / (VLEN / width));
+ index = index % (VLEN / width);
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src].u32[index];
+ }
+ }
+ break;
+ case 64:
+ index = env->gpr[rs1];
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (index >= vlmax) {
+ env->vfp.vreg[dest].u64[j] = 0;
+ } else {
+ src = rs2 + (index / (VLEN / width));
+ index = index % (VLEN / width);
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src].u64[index];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] */
+void VECTOR_HELPER(vrgather_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src;
+ uint32_t index;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ index = rs1;
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (index >= vlmax) {
+ env->vfp.vreg[dest].u8[j] = 0;
+ } else {
+ src = rs2 + (index / (VLEN / width));
+ index = index % (VLEN / width);
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src].u8[index];
+ }
+ }
+ break;
+ case 16:
+ index = rs1;
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (index >= vlmax) {
+ env->vfp.vreg[dest].u16[j] = 0;
+ } else {
+ src = rs2 + (index / (VLEN / width));
+ index = index % (VLEN / width);
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src].u16[index];
+ }
+ }
+ break;
+ case 32:
+ index = rs1;
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (index >= vlmax) {
+ env->vfp.vreg[dest].u32[j] = 0;
+ } else {
+ src = rs2 + (index / (VLEN / width));
+ index = index % (VLEN / width);
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src].u32[index];
+ }
+ }
+ break;
+ case 64:
+ index = rs1;
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (index >= vlmax) {
+ env->vfp.vreg[dest].u64[j] = 0;
+ } else {
+ src = rs2 + (index / (VLEN / width));
+ index = index % (VLEN / width);
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src].u64[index];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vext_x_v)(CPURISCVState *env, uint32_t rs1, uint32_t rs2,
+ uint32_t rd)
+{
+ int width;
+ uint64_t elem;
+ target_ulong index = env->gpr[rs1];
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ width = vector_get_width(env);
+
+ elem = vector_get_iu_elem(env, width, rs2, index);
+ if (index >= VLEN / width) { /* index is too big */
+ env->gpr[rd] = 0;
+ } else {
+ env->gpr[rd] = elem;
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfmv.f.s rd, vs2 # rd = vs2[0] (rs1=0) */
+void VECTOR_HELPER(vfmv_f_s)(CPURISCVState *env, uint32_t rs1, uint32_t rs2,
+ uint32_t rd)
+{
+ int width, flen;
+ uint64_t mask;
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ if (env->misa & RVD) {
+ flen = 8;
+ } else if (env->misa & RVF) {
+ flen = 4;
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ width = vector_get_width(env);
+ mask = (~((uint64_t)0)) << width;
+
+ if (width == 8) {
+ env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s8[0] | mask;
+ } else if (width == 16) {
+ env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s16[0] | mask;
+ } else if (width == 32) {
+ env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s32[0] | mask;
+ } else if (width == 64) {
+ if (flen == 4) {
+ env->fpr[rd] = env->vfp.vreg[rs2].s64[0] & 0xffffffff;
+ } else {
+ env->fpr[rd] = env->vfp.vreg[rs2].s64[0];
+ }
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmv.s.x vd, rs1 # vd[0] = rs1 */
+void VECTOR_HELPER(vmv_s_x)(CPURISCVState *env, uint32_t rs1, uint32_t rs2,
+ uint32_t rd)
+{
+ int width;
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart >= env->vfp.vl) {
+ return;
+ }
+
+ memset(&env->vfp.vreg[rd].u8[0], 0, VLEN / 8);
+ width = vector_get_width(env);
+
+ if (width == 8) {
+ env->vfp.vreg[rd].u8[0] = env->gpr[rs1];
+ } else if (width == 16) {
+ env->vfp.vreg[rd].u16[0] = env->gpr[rs1];
+ } else if (width == 32) {
+ env->vfp.vreg[rd].u32[0] = env->gpr[rs1];
+ } else if (width == 64) {
+ env->vfp.vreg[rd].u64[0] = env->gpr[rs1];
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2 = 0) */
+void VECTOR_HELPER(vfmv_s_f)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, flen;
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ if (env->vfp.vstart >= env->vfp.vl) {
+ return;
+ }
+ if (env->misa & RVD) {
+ flen = 8;
+ } else if (env->misa & RVF) {
+ flen = 4;
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ width = vector_get_width(env);
+
+ if (width == 8) {
+ env->vfp.vreg[rd].u8[0] = env->fpr[rs1];
+ } else if (width == 16) {
+ env->vfp.vreg[rd].u16[0] = env->fpr[rs1];
+ } else if (width == 32) {
+ env->vfp.vreg[rd].u32[0] = env->fpr[rs1];
+ } else if (width == 64) {
+ if (flen == 4) { /* 1-extended to FLEN bits */
+ env->vfp.vreg[rd].u64[0] = (uint64_t)env->fpr[rs1]
+ | 0xffffffff00000000;
+ } else {
+ env->vfp.vreg[rd].u64[0] = env->fpr[rs1];
+ }
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
+void VECTOR_HELPER(vslideup_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax, offset;
+ int i, j, dest, src, k;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ offset = env->gpr[rs1];
+
+ if (offset < env->vfp.vstart) {
+ offset = env->vfp.vstart;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src = rs2 + ((i - offset) / (VLEN / width));
+ j = i % (VLEN / width);
+ k = (i - offset) % (VLEN / width);
+ if (i < offset) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src].u8[k];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src].u16[k];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src].u32[k];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src].u64[k];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+
+ env->vfp.vstart = 0;
+}
+
+/* vslideup.vi vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
+void VECTOR_HELPER(vslideup_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax, offset;
+ int i, j, dest, src, k;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ offset = rs1;
+
+ if (offset < env->vfp.vstart) {
+ offset = env->vfp.vstart;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src = rs2 + ((i - offset) / (VLEN / width));
+ j = i % (VLEN / width);
+ k = (i - offset) % (VLEN / width);
+ if (i < offset) {
+ continue;
+ } else if (i < vl) {
+ if (width == 8) {
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src].u8[k];
+ }
+ } else if (width == 16) {
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src].u16[k];
+ }
+ } else if (width == 32) {
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src].u32[k];
+ }
+ } else if (width == 64) {
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src].u64[k];
+ }
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
+void VECTOR_HELPER(vslide1up_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src, k;
+ uint64_t s1;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ s1 = env->gpr[rs1];
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src = rs2 + ((i - 1) / (VLEN / width));
+ j = i % (VLEN / width);
+ k = (i - 1) % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i == 0 && env->vfp.vstart == 0) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = s1;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = s1;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = s1;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = s1;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src].u8[k];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src].u16[k];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src].u32[k];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src].u64[k];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i + rs1] */
+void VECTOR_HELPER(vslidedown_vx)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax, offset;
+ int i, j, dest, src, k;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ offset = env->gpr[rs1];
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src = rs2 + ((i + offset) / (VLEN / width));
+ j = i % (VLEN / width);
+ k = (i + offset) % (VLEN / width);
+ if (i < offset) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (i + offset < vlmax) {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src].u8[k];
+ } else {
+ env->vfp.vreg[dest].u8[j] = 0;
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (i + offset < vlmax) {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src].u16[k];
+ } else {
+ env->vfp.vreg[dest].u16[j] = 0;
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (i + offset < vlmax) {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src].u32[k];
+ } else {
+ env->vfp.vreg[dest].u32[j] = 0;
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (i + offset < vlmax) {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src].u64[k];
+ } else {
+ env->vfp.vreg[dest].u64[j] = 0;
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vslidedown_vi)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax, offset;
+ int i, j, dest, src, k;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ offset = rs1;
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src = rs2 + ((i + offset) / (VLEN / width));
+ j = i % (VLEN / width);
+ k = (i + offset) % (VLEN / width);
+ if (i < offset) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (i + offset < vlmax) {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src].u8[k];
+ } else {
+ env->vfp.vreg[dest].u8[j] = 0;
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (i + offset < vlmax) {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src].u16[k];
+ } else {
+ env->vfp.vreg[dest].u16[j] = 0;
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (i + offset < vlmax) {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src].u32[k];
+ } else {
+ env->vfp.vreg[dest].u32[j] = 0;
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (i + offset < vlmax) {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src].u64[k];
+ } else {
+ env->vfp.vreg[dest].u64[j] = 0;
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vslide1down.vx vd, vs2, rs1, vm # vd[vl - 1]=x[rs1], vd[i] = vs2[i + 1] */
+void VECTOR_HELPER(vslide1down_vx)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src, k;
+ uint64_t s1;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ s1 = env->gpr[rs1];
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src = rs2 + ((i + 1) / (VLEN / width));
+ j = i % (VLEN / width);
+ k = (i + 1) % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i == vl - 1 && i >= env->vfp.vstart) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = s1;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = s1;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = s1;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = s1;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else if (i < vl - 1) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src].u8[k];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src].u16[k];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src].u32[k];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src].u64[k];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/*
+ * vcompress.vm vd, vs2, vs1
+ * Compress into vd elements of vs2 where vs1 is enabled
+ */
+void VECTOR_HELPER(vcompress_vm)(CPURISCVState *env, uint32_t rs1, uint32_t rs2,
+ uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src;
+ uint32_t vd_idx, num = 0;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs1, 1)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ /* zeroed all elements */
+ for (i = 0; i < lmul; i++) {
+ memset(&env->vfp.vreg[rd + i].u64[0], 0, VLEN / 8);
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (num / (VLEN / width));
+ src = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ vd_idx = num % (VLEN / width);
+ if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_mask_reg(env, rs1, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[vd_idx] =
+ env->vfp.vreg[src].u8[j];
+ num++;
+ }
+ break;
+ case 16:
+ if (vector_mask_reg(env, rs1, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[vd_idx] =
+ env->vfp.vreg[src].u16[j];
+ num++;
+ }
+ break;
+ case 32:
+ if (vector_mask_reg(env, rs1, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[vd_idx] =
+ env->vfp.vreg[src].u32[j];
+ num++;
+ }
+ break;
+ case 64:
+ if (vector_mask_reg(env, rs1, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[vd_idx] =
+ env->vfp.vreg[src].u64[j];
+ num++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
+ + env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j]
+ + env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j]
+ + env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j]
+ + env->vfp.vreg[src2].u64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
+ + env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
+ + env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
+ + env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] =
+ (uint64_t)extend_gpr(env->gpr[rs1])
+ + env->vfp.vreg[src2].u64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
+ + env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
+ + env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
+ + env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
+ + env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vredsum.vs vd, vs2, vs1, vm # vd[0] = sum(vs1[0] , vs2[*]) */
+void VECTOR_HELPER(vredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ uint64_t sum = 0;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum += env->vfp.vreg[src2].u8[j];
+ }
+ if (i == 0) {
+ sum += env->vfp.vreg[rs1].u8[0];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u8[0] = sum;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum += env->vfp.vreg[src2].u16[j];
+ }
+ if (i == 0) {
+ sum += env->vfp.vreg[rs1].u16[0];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u16[0] = sum;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum += env->vfp.vreg[src2].u32[j];
+ }
+ if (i == 0) {
+ sum += env->vfp.vreg[rs1].u32[0];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u32[0] = sum;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum += env->vfp.vreg[src2].u64[j];
+ }
+ if (i == 0) {
+ sum += env->vfp.vreg[rs1].u64[0];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u64[0] = sum;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfadd.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_add(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_add(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_add(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfadd.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_add(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_add(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_add(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vredand.vs vd, vs2, vs1, vm # vd[0] = and( vs1[0] , vs2[*] ) */
+void VECTOR_HELPER(vredand_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ uint64_t res = 0;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 8:
+ if (i == 0) {
+ res = env->vfp.vreg[rs1].u8[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ res &= env->vfp.vreg[src2].u8[j];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u8[0] = res;
+ }
+ break;
+ case 16:
+ if (i == 0) {
+ res = env->vfp.vreg[rs1].u16[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ res &= env->vfp.vreg[src2].u16[j];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u16[0] = res;
+ }
+ break;
+ case 32:
+ if (i == 0) {
+ res = env->vfp.vreg[rs1].u32[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ res &= env->vfp.vreg[src2].u32[j];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u32[0] = res;
+ }
+ break;
+ case 64:
+ if (i == 0) {
+ res = env->vfp.vreg[rs1].u64[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ res &= env->vfp.vreg[src2].u64[j];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u64[0] = res;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfredsum.vs vd, vs2, vs1, vm # Unordered sum */
+void VECTOR_HELPER(vfredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ float16 sum16 = 0.0f;
+ float32 sum32 = 0.0f;
+ float64 sum64 = 0.0f;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 16:
+ if (i == 0) {
+ sum16 = env->vfp.vreg[rs1].f16[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum16 = float16_add(sum16, env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].f16[0] = sum16;
+ }
+ break;
+ case 32:
+ if (i == 0) {
+ sum32 = env->vfp.vreg[rs1].f32[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum32 = float32_add(sum32, env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].f32[0] = sum32;
+ }
+ break;
+ case 64:
+ if (i == 0) {
+ sum64 = env->vfp.vreg[rs1].f64[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum64 = float64_add(sum64, env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].f64[0] = sum64;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
+ - env->vfp.vreg[src1].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ - env->vfp.vreg[src1].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ - env->vfp.vreg[src1].u32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ - env->vfp.vreg[src1].u64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
+ - env->gpr[rs1];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ - env->gpr[rs1];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ - env->gpr[rs1];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ - (uint64_t)extend_gpr(env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vredor.vs vd, vs2, vs1, vm # vd[0] = or( vs1[0] , vs2[*] ) */
+void VECTOR_HELPER(vredor_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ uint64_t res = 0;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 8:
+ if (i == 0) {
+ res = env->vfp.vreg[rs1].u8[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ res |= env->vfp.vreg[src2].u8[j];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u8[0] = res;
+ }
+ break;
+ case 16:
+ if (i == 0) {
+ res = env->vfp.vreg[rs1].u16[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ res |= env->vfp.vreg[src2].u16[j];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u16[0] = res;
+ }
+ break;
+ case 32:
+ if (i == 0) {
+ res = env->vfp.vreg[rs1].u32[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ res |= env->vfp.vreg[src2].u32[j];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u32[0] = res;
+ }
+ break;
+ case 64:
+ if (i == 0) {
+ res = env->vfp.vreg[rs1].u64[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ res |= env->vfp.vreg[src2].u64[j];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u64[0] = res;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfsub.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_sub(
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[src1].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_sub(
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[src1].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_sub(
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[src1].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfsub.vf vd, vs2, rs1, vm # Vector-scalar vd[i] = vs2[i] - f[rs1] */
+void VECTOR_HELPER(vfsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_sub(
+ env->vfp.vreg[src2].f16[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_sub(
+ env->vfp.vreg[src2].f32[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_sub(
+ env->vfp.vreg[src2].f64[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vrsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
+ - env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
+ - env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
+ - env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] =
+ (uint64_t)extend_gpr(env->gpr[rs1])
+ - env->vfp.vreg[src2].u64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vrsub_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
+ - env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
+ - env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
+ - env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
+ - env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vredxor.vs vd, vs2, vs1, vm # vd[0] = xor( vs1[0] , vs2[*] ) */
+void VECTOR_HELPER(vredxor_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ uint64_t res = 0;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 8:
+ if (i == 0) {
+ res = env->vfp.vreg[rs1].u8[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ res ^= env->vfp.vreg[src2].u8[j];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u8[0] = res;
+ }
+ break;
+ case 16:
+ if (i == 0) {
+ res = env->vfp.vreg[rs1].u16[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ res ^= env->vfp.vreg[src2].u16[j];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u16[0] = res;
+ }
+ break;
+ case 32:
+ if (i == 0) {
+ res = env->vfp.vreg[rs1].u32[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ res ^= env->vfp.vreg[src2].u32[j];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u32[0] = res;
+ }
+ break;
+ case 64:
+ if (i == 0) {
+ res = env->vfp.vreg[rs1].u64[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ res ^= env->vfp.vreg[src2].u64[j];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u64[0] = res;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfredosum.vs vd, vs2, vs1, vm # Ordered sum */
+void VECTOR_HELPER(vfredosum_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ helper_vector_vfredsum_vs(env, vm, rs1, rs2, rd);
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vminu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u8[j] <=
+ env->vfp.vreg[src2].u8[j]) {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src1].u8[j];
+ } else {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src2].u8[j];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u16[j] <=
+ env->vfp.vreg[src2].u16[j]) {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src1].u16[j];
+ } else {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src2].u16[j];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u32[j] <=
+ env->vfp.vreg[src2].u32[j]) {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src1].u32[j];
+ } else {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src2].u32[j];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u64[j] <=
+ env->vfp.vreg[src2].u64[j]) {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src1].u64[j];
+ } else {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src2].u64[j];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vminu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint8_t)env->gpr[rs1] <=
+ env->vfp.vreg[src2].u8[j]) {
+ env->vfp.vreg[dest].u8[j] =
+ env->gpr[rs1];
+ } else {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src2].u8[j];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint16_t)env->gpr[rs1] <=
+ env->vfp.vreg[src2].u16[j]) {
+ env->vfp.vreg[dest].u16[j] =
+ env->gpr[rs1];
+ } else {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src2].u16[j];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint32_t)env->gpr[rs1] <=
+ env->vfp.vreg[src2].u32[j]) {
+ env->vfp.vreg[dest].u32[j] =
+ env->gpr[rs1];
+ } else {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src2].u32[j];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint64_t)extend_gpr(env->gpr[rs1]) <=
+ env->vfp.vreg[src2].u64[j]) {
+ env->vfp.vreg[dest].u64[j] =
+ (uint64_t)extend_gpr(env->gpr[rs1]);
+ } else {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src2].u64[j];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vredminu.vs vd, vs2, vs1, vm # vd[0] = minu( vs1[0] , vs2[*] ) */
+void VECTOR_HELPER(vredminu_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ uint64_t minu = 0;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 8:
+ if (i == 0) {
+ minu = env->vfp.vreg[rs1].u8[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (minu > env->vfp.vreg[src2].u8[j]) {
+ minu = env->vfp.vreg[src2].u8[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u8[0] = minu;
+ }
+ break;
+ case 16:
+ if (i == 0) {
+ minu = env->vfp.vreg[rs1].u16[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (minu > env->vfp.vreg[src2].u16[j]) {
+ minu = env->vfp.vreg[src2].u16[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u16[0] = minu;
+ }
+ break;
+ case 32:
+ if (i == 0) {
+ minu = env->vfp.vreg[rs1].u32[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (minu > env->vfp.vreg[src2].u32[j]) {
+ minu = env->vfp.vreg[src2].u32[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u32[0] = minu;
+ }
+ break;
+ case 64:
+ if (i == 0) {
+ minu = env->vfp.vreg[rs1].u64[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (minu > env->vfp.vreg[src2].u64[j]) {
+ minu = env->vfp.vreg[src2].u64[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u64[0] = minu;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfmin.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfmin_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_minnum(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_minnum(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_minnum(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f16[j] = 0;
+ case 32:
+ env->vfp.vreg[dest].f32[j] = 0;
+ case 64:
+ env->vfp.vreg[dest].f64[j] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfmin.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfmin_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_minnum(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_minnum(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_minnum(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f16[j] = 0;
+ case 32:
+ env->vfp.vreg[dest].f32[j] = 0;
+ case 64:
+ env->vfp.vreg[dest].f64[j] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmin_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s8[j] <=
+ env->vfp.vreg[src2].s8[j]) {
+ env->vfp.vreg[dest].s8[j] =
+ env->vfp.vreg[src1].s8[j];
+ } else {
+ env->vfp.vreg[dest].s8[j] =
+ env->vfp.vreg[src2].s8[j];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s16[j] <=
+ env->vfp.vreg[src2].s16[j]) {
+ env->vfp.vreg[dest].s16[j] =
+ env->vfp.vreg[src1].s16[j];
+ } else {
+ env->vfp.vreg[dest].s16[j] =
+ env->vfp.vreg[src2].s16[j];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s32[j] <=
+ env->vfp.vreg[src2].s32[j]) {
+ env->vfp.vreg[dest].s32[j] =
+ env->vfp.vreg[src1].s32[j];
+ } else {
+ env->vfp.vreg[dest].s32[j] =
+ env->vfp.vreg[src2].s32[j];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s64[j] <=
+ env->vfp.vreg[src2].s64[j]) {
+ env->vfp.vreg[dest].s64[j] =
+ env->vfp.vreg[src1].s64[j];
+ } else {
+ env->vfp.vreg[dest].s64[j] =
+ env->vfp.vreg[src2].s64[j];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmin_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int8_t)env->gpr[rs1] <=
+ env->vfp.vreg[src2].s8[j]) {
+ env->vfp.vreg[dest].s8[j] =
+ env->gpr[rs1];
+ } else {
+ env->vfp.vreg[dest].s8[j] =
+ env->vfp.vreg[src2].s8[j];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int16_t)env->gpr[rs1] <=
+ env->vfp.vreg[src2].s16[j]) {
+ env->vfp.vreg[dest].s16[j] =
+ env->gpr[rs1];
+ } else {
+ env->vfp.vreg[dest].s16[j] =
+ env->vfp.vreg[src2].s16[j];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int32_t)env->gpr[rs1] <=
+ env->vfp.vreg[src2].s32[j]) {
+ env->vfp.vreg[dest].s32[j] =
+ env->gpr[rs1];
+ } else {
+ env->vfp.vreg[dest].s32[j] =
+ env->vfp.vreg[src2].s32[j];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int64_t)extend_gpr(env->gpr[rs1]) <=
+ env->vfp.vreg[src2].s64[j]) {
+ env->vfp.vreg[dest].s64[j] =
+ (int64_t)extend_gpr(env->gpr[rs1]);
+ } else {
+ env->vfp.vreg[dest].s64[j] =
+ env->vfp.vreg[src2].s64[j];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vredmin.vs vd, vs2, vs1, vm # vd[0] = min( vs1[0] , vs2[*] ) */
+void VECTOR_HELPER(vredmin_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ int64_t min = 0;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 8:
+ if (i == 0) {
+ min = env->vfp.vreg[rs1].s8[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (min > env->vfp.vreg[src2].s8[j]) {
+ min = env->vfp.vreg[src2].s8[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].s8[0] = min;
+ }
+ break;
+ case 16:
+ if (i == 0) {
+ min = env->vfp.vreg[rs1].s16[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (min > env->vfp.vreg[src2].s16[j]) {
+ min = env->vfp.vreg[src2].s16[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].s16[0] = min;
+ }
+ break;
+ case 32:
+ if (i == 0) {
+ min = env->vfp.vreg[rs1].s32[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (min > env->vfp.vreg[src2].s32[j]) {
+ min = env->vfp.vreg[src2].s32[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].s32[0] = min;
+ }
+ break;
+ case 64:
+ if (i == 0) {
+ min = env->vfp.vreg[rs1].s64[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (min > env->vfp.vreg[src2].s64[j]) {
+ min = env->vfp.vreg[src2].s64[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].s64[0] = min;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfredmin.vs vd, vs2, vs1, vm # Minimum value */
+void VECTOR_HELPER(vfredmin_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ float16 min16 = 0.0f;
+ float32 min32 = 0.0f;
+ float64 min64 = 0.0f;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 16:
+ if (i == 0) {
+ min16 = env->vfp.vreg[rs1].f16[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ min16 = float16_minnum(min16, env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].f16[0] = min16;
+ }
+ break;
+ case 32:
+ if (i == 0) {
+ min32 = env->vfp.vreg[rs1].f32[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ min32 = float32_minnum(min32, env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].f32[0] = min32;
+ }
+ break;
+ case 64:
+ if (i == 0) {
+ min64 = env->vfp.vreg[rs1].f64[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ min64 = float64_minnum(min64, env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].f64[0] = min64;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmaxu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u8[j] >=
+ env->vfp.vreg[src2].u8[j]) {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src1].u8[j];
+ } else {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src2].u8[j];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u16[j] >=
+ env->vfp.vreg[src2].u16[j]) {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src1].u16[j];
+ } else {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src2].u16[j];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u32[j] >=
+ env->vfp.vreg[src2].u32[j]) {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src1].u32[j];
+ } else {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src2].u32[j];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u64[j] >=
+ env->vfp.vreg[src2].u64[j]) {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src1].u64[j];
+ } else {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src2].u64[j];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vmaxu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint8_t)env->gpr[rs1] >=
+ env->vfp.vreg[src2].u8[j]) {
+ env->vfp.vreg[dest].u8[j] =
+ env->gpr[rs1];
+ } else {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src2].u8[j];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint16_t)env->gpr[rs1] >=
+ env->vfp.vreg[src2].u16[j]) {
+ env->vfp.vreg[dest].u16[j] =
+ env->gpr[rs1];
+ } else {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src2].u16[j];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint32_t)env->gpr[rs1] >=
+ env->vfp.vreg[src2].u32[j]) {
+ env->vfp.vreg[dest].u32[j] =
+ env->gpr[rs1];
+ } else {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src2].u32[j];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint64_t)extend_gpr(env->gpr[rs1]) >=
+ env->vfp.vreg[src2].u64[j]) {
+ env->vfp.vreg[dest].u64[j] =
+ (uint64_t)extend_gpr(env->gpr[rs1]);
+ } else {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src2].u64[j];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vredmaxu.vs vd, vs2, vs1, vm # vd[0] = maxu( vs1[0] , vs2[*] ) */
+void VECTOR_HELPER(vredmaxu_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ uint64_t maxu = 0;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 8:
+ if (i == 0) {
+ maxu = env->vfp.vreg[rs1].u8[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (maxu < env->vfp.vreg[src2].u8[j]) {
+ maxu = env->vfp.vreg[src2].u8[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u8[0] = maxu;
+ }
+ break;
+ case 16:
+ if (i == 0) {
+ maxu = env->vfp.vreg[rs1].u16[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (maxu < env->vfp.vreg[src2].u16[j]) {
+ maxu = env->vfp.vreg[src2].u16[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u16[0] = maxu;
+ }
+ break;
+ case 32:
+ if (i == 0) {
+ maxu = env->vfp.vreg[rs1].u32[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (maxu < env->vfp.vreg[src2].u32[j]) {
+ maxu = env->vfp.vreg[src2].u32[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u32[0] = maxu;
+ }
+ break;
+ case 64:
+ if (i == 0) {
+ maxu = env->vfp.vreg[rs1].u64[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (maxu < env->vfp.vreg[src2].u64[j]) {
+ maxu = env->vfp.vreg[src2].u64[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u64[0] = maxu;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/*vfmax.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfmax_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_maxnum(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_maxnum(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_maxnum(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f16[j] = 0;
+ case 32:
+ env->vfp.vreg[dest].f32[j] = 0;
+ case 64:
+ env->vfp.vreg[dest].f64[j] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfmax.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfmax_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_maxnum(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_maxnum(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_maxnum(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f16[j] = 0;
+ case 32:
+ env->vfp.vreg[dest].f32[j] = 0;
+ case 64:
+ env->vfp.vreg[dest].f64[j] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmax_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s8[j] >=
+ env->vfp.vreg[src2].s8[j]) {
+ env->vfp.vreg[dest].s8[j] =
+ env->vfp.vreg[src1].s8[j];
+ } else {
+ env->vfp.vreg[dest].s8[j] =
+ env->vfp.vreg[src2].s8[j];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s16[j] >=
+ env->vfp.vreg[src2].s16[j]) {
+ env->vfp.vreg[dest].s16[j] =
+ env->vfp.vreg[src1].s16[j];
+ } else {
+ env->vfp.vreg[dest].s16[j] =
+ env->vfp.vreg[src2].s16[j];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s32[j] >=
+ env->vfp.vreg[src2].s32[j]) {
+ env->vfp.vreg[dest].s32[j] =
+ env->vfp.vreg[src1].s32[j];
+ } else {
+ env->vfp.vreg[dest].s32[j] =
+ env->vfp.vreg[src2].s32[j];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s64[j] >=
+ env->vfp.vreg[src2].s64[j]) {
+ env->vfp.vreg[dest].s64[j] =
+ env->vfp.vreg[src1].s64[j];
+ } else {
+ env->vfp.vreg[dest].s64[j] =
+ env->vfp.vreg[src2].s64[j];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmax_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int8_t)env->gpr[rs1] >=
+ env->vfp.vreg[src2].s8[j]) {
+ env->vfp.vreg[dest].s8[j] =
+ env->gpr[rs1];
+ } else {
+ env->vfp.vreg[dest].s8[j] =
+ env->vfp.vreg[src2].s8[j];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int16_t)env->gpr[rs1] >=
+ env->vfp.vreg[src2].s16[j]) {
+ env->vfp.vreg[dest].s16[j] =
+ env->gpr[rs1];
+ } else {
+ env->vfp.vreg[dest].s16[j] =
+ env->vfp.vreg[src2].s16[j];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int32_t)env->gpr[rs1] >=
+ env->vfp.vreg[src2].s32[j]) {
+ env->vfp.vreg[dest].s32[j] =
+ env->gpr[rs1];
+ } else {
+ env->vfp.vreg[dest].s32[j] =
+ env->vfp.vreg[src2].s32[j];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int64_t)extend_gpr(env->gpr[rs1]) >=
+ env->vfp.vreg[src2].s64[j]) {
+ env->vfp.vreg[dest].s64[j] =
+ (int64_t)extend_gpr(env->gpr[rs1]);
+ } else {
+ env->vfp.vreg[dest].s64[j] =
+ env->vfp.vreg[src2].s64[j];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vredmax.vs vd, vs2, vs1, vm # vd[0] = max( vs1[0] , vs2[*] ) */
+void VECTOR_HELPER(vredmax_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ int64_t max = 0;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 8:
+ if (i == 0) {
+ max = env->vfp.vreg[rs1].s8[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (max < env->vfp.vreg[src2].s8[j]) {
+ max = env->vfp.vreg[src2].s8[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].s8[0] = max;
+ }
+ break;
+ case 16:
+ if (i == 0) {
+ max = env->vfp.vreg[rs1].s16[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (max < env->vfp.vreg[src2].s16[j]) {
+ max = env->vfp.vreg[src2].s16[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].s16[0] = max;
+ }
+ break;
+ case 32:
+ if (i == 0) {
+ max = env->vfp.vreg[rs1].s32[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (max < env->vfp.vreg[src2].s32[j]) {
+ max = env->vfp.vreg[src2].s32[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].s32[0] = max;
+ }
+ break;
+ case 64:
+ if (i == 0) {
+ max = env->vfp.vreg[rs1].s64[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (max < env->vfp.vreg[src2].s64[j]) {
+ max = env->vfp.vreg[src2].s64[j];
+ }
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].s64[0] = max;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfredmax.vs vd, vs2, vs1, vm # Maximum value */
+void VECTOR_HELPER(vfredmax_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ float16 max16 = 0.0f;
+ float32 max32 = 0.0f;
+ float64 max64 = 0.0f;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 16:
+ if (i == 0) {
+ max16 = env->vfp.vreg[rs1].f16[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ max16 = float16_maxnum(max16, env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].f16[0] = max16;
+ }
+ break;
+ case 32:
+ if (i == 0) {
+ max32 = env->vfp.vreg[rs1].f32[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ max32 = float32_maxnum(max32, env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].f32[0] = max32;
+ }
+ break;
+ case 64:
+ if (i == 0) {
+ max64 = env->vfp.vreg[rs1].f64[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ max64 = float64_maxnum(max64, env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].f64[0] = max64;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfsgnj.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfsgnj_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = deposit16(
+ env->vfp.vreg[src1].f16[j],
+ 0,
+ 15,
+ env->vfp.vreg[src2].f16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = deposit32(
+ env->vfp.vreg[src1].f32[j],
+ 0,
+ 31,
+ env->vfp.vreg[src2].f32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = deposit64(
+ env->vfp.vreg[src1].f64[j],
+ 0,
+ 63,
+ env->vfp.vreg[src2].f64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f16[j] = 0;
+ case 32:
+ env->vfp.vreg[dest].f32[j] = 0;
+ case 64:
+ env->vfp.vreg[dest].f64[j] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfsgnj.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfsgnj_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = deposit16(
+ env->fpr[rs1],
+ 0,
+ 15,
+ env->vfp.vreg[src2].f16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = deposit32(
+ env->fpr[rs1],
+ 0,
+ 31,
+ env->vfp.vreg[src2].f32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = deposit64(
+ env->fpr[rs1],
+ 0,
+ 63,
+ env->vfp.vreg[src2].f64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f16[j] = 0;
+ case 32:
+ env->vfp.vreg[dest].f32[j] = 0;
+ case 64:
+ env->vfp.vreg[dest].f64[j] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vand_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
+ & env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j]
+ & env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j]
+ & env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j]
+ & env->vfp.vreg[src2].u64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vand_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
+ & env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
+ & env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
+ & env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] =
+ (uint64_t)extend_gpr(env->gpr[rs1])
+ & env->vfp.vreg[src2].u64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vand_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
+ & env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
+ & env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
+ & env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
+ & env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfsgnjn.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfsgnjn_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = deposit16(
+ ~env->vfp.vreg[src1].f16[j],
+ 0,
+ 15,
+ env->vfp.vreg[src2].f16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = deposit32(
+ ~env->vfp.vreg[src1].f32[j],
+ 0,
+ 31,
+ env->vfp.vreg[src2].f32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = deposit64(
+ ~env->vfp.vreg[src1].f64[j],
+ 0,
+ 63,
+ env->vfp.vreg[src2].f64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f16[j] = 0;
+ case 32:
+ env->vfp.vreg[dest].f32[j] = 0;
+ case 64:
+ env->vfp.vreg[dest].f64[j] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+/* vfsgnjn.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfsgnjn_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = deposit16(
+ ~env->fpr[rs1],
+ 0,
+ 15,
+ env->vfp.vreg[src2].f16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = deposit32(
+ ~env->fpr[rs1],
+ 0,
+ 31,
+ env->vfp.vreg[src2].f32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = deposit64(
+ ~env->fpr[rs1],
+ 0,
+ 63,
+ env->vfp.vreg[src2].f64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f16[j] = 0;
+ case 32:
+ env->vfp.vreg[dest].f32[j] = 0;
+ case 64:
+ env->vfp.vreg[dest].f64[j] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vor_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
+ | env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j]
+ | env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j]
+ | env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j]
+ | env->vfp.vreg[src2].u64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vor_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
+ | env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
+ | env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
+ | env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] =
+ (uint64_t)extend_gpr(env->gpr[rs1])
+ | env->vfp.vreg[src2].u64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vor_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
+ | env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
+ | env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
+ | env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
+ | env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfsgnjx.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfsgnjx_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = deposit16(
+ env->vfp.vreg[src1].f16[j] ^
+ env->vfp.vreg[src2].f16[j],
+ 0,
+ 15,
+ env->vfp.vreg[src2].f16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = deposit32(
+ env->vfp.vreg[src1].f32[j] ^
+ env->vfp.vreg[src2].f32[j],
+ 0,
+ 31,
+ env->vfp.vreg[src2].f32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = deposit64(
+ env->vfp.vreg[src1].f64[j] ^
+ env->vfp.vreg[src2].f64[j],
+ 0,
+ 63,
+ env->vfp.vreg[src2].f64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f16[j] = 0;
+ case 32:
+ env->vfp.vreg[dest].f32[j] = 0;
+ case 64:
+ env->vfp.vreg[dest].f64[j] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+
+ env->vfp.vstart = 0;
+}
+
+/* vfsgnjx.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfsgnjx_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = deposit16(
+ env->fpr[rs1] ^
+ env->vfp.vreg[src2].f16[j],
+ 0,
+ 15,
+ env->vfp.vreg[src2].f16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = deposit32(
+ env->fpr[rs1] ^
+ env->vfp.vreg[src2].f32[j],
+ 0,
+ 31,
+ env->vfp.vreg[src2].f32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = deposit64(
+ env->fpr[rs1] ^
+ env->vfp.vreg[src2].f64[j],
+ 0,
+ 63,
+ env->vfp.vreg[src2].f64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f16[j] = 0;
+ case 32:
+ env->vfp.vreg[dest].f32[j] = 0;
+ case 64:
+ env->vfp.vreg[dest].f64[j] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vxor_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
+ ^ env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j]
+ ^ env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j]
+ ^ env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j]
+ ^ env->vfp.vreg[src2].u64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vxor_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
+ ^ env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
+ ^ env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
+ ^ env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] =
+ (uint64_t)extend_gpr(env->gpr[rs1])
+ ^ env->vfp.vreg[src2].u64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vxor_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
+ ^ env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
+ ^ env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
+ ^ env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
+ ^ env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vadc_vvm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax, carry;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
+ + env->vfp.vreg[src2].u8[j] + carry;
+ break;
+ case 16:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j]
+ + env->vfp.vreg[src2].u16[j] + carry;
+ break;
+ case 32:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j]
+ + env->vfp.vreg[src2].u32[j] + carry;
+ break;
+ case 64:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j]
+ + env->vfp.vreg[src2].u64[j] + carry;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vadc_vxm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax, carry;
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
+ + env->vfp.vreg[src2].u8[j] + carry;
+ break;
+ case 16:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
+ + env->vfp.vreg[src2].u16[j] + carry;
+ break;
+ case 32:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
+ + env->vfp.vreg[src2].u32[j] + carry;
+ break;
+ case 64:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u64[j] = (uint64_t)extend_gpr(env->gpr[rs1])
+ + env->vfp.vreg[src2].u64[j] + carry;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vadc_vim)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax, carry;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u8[j] = sign_extend(rs1, 5)
+ + env->vfp.vreg[src2].u8[j] + carry;
+ break;
+ case 16:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u16[j] = sign_extend(rs1, 5)
+ + env->vfp.vreg[src2].u16[j] + carry;
+ break;
+ case 32:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u32[j] = sign_extend(rs1, 5)
+ + env->vfp.vreg[src2].u32[j] + carry;
+ break;
+ case 64:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u64[j] = sign_extend(rs1, 5)
+ + env->vfp.vreg[src2].u64[j] + carry;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vmadc_vvm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, vlmax, carry;
+ uint64_t tmp;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
+ || (rd == 0)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = env->vfp.vreg[src1].u8[j]
+ + env->vfp.vreg[src2].u8[j] + carry;
+ tmp = tmp >> width;
+
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 16:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = env->vfp.vreg[src1].u16[j]
+ + env->vfp.vreg[src2].u16[j] + carry;
+ tmp = tmp >> width;
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 32:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = (uint64_t)env->vfp.vreg[src1].u32[j]
+ + (uint64_t)env->vfp.vreg[src2].u32[j] + carry;
+ tmp = tmp >> width;
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 64:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = env->vfp.vreg[src1].u64[j]
+ + env->vfp.vreg[src2].u64[j] + carry;
+
+ if ((tmp < env->vfp.vreg[src1].u64[j] ||
+ tmp < env->vfp.vreg[src2].u64[j])
+ || (env->vfp.vreg[src1].u64[j] == MAX_U64 &&
+ env->vfp.vreg[src2].u64[j] == MAX_U64)) {
+ tmp = 1;
+ } else {
+ tmp = 0;
+ }
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmadc_vxm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax, carry;
+ uint64_t tmp, extend_rs1;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
+ || (rd == 0)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = (uint8_t)env->gpr[rs1]
+ + env->vfp.vreg[src2].u8[j] + carry;
+ tmp = tmp >> width;
+
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 16:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = (uint16_t)env->gpr[rs1]
+ + env->vfp.vreg[src2].u16[j] + carry;
+ tmp = tmp >> width;
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 32:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = (uint64_t)((uint32_t)env->gpr[rs1])
+ + (uint64_t)env->vfp.vreg[src2].u32[j] + carry;
+ tmp = tmp >> width;
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 64:
+ carry = vector_get_carry(env, width, lmul, i);
+
+ extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]);
+ tmp = extend_rs1 + env->vfp.vreg[src2].u64[j] + carry;
+ if ((tmp < extend_rs1) ||
+ (carry && (env->vfp.vreg[src2].u64[j] == MAX_U64))) {
+ tmp = 1;
+ } else {
+ tmp = 0;
+ }
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vmadc_vim)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax, carry;
+ uint64_t tmp;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
+ || (rd == 0)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = (uint8_t)sign_extend(rs1, 5)
+ + env->vfp.vreg[src2].u8[j] + carry;
+ tmp = tmp >> width;
+
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 16:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = (uint16_t)sign_extend(rs1, 5)
+ + env->vfp.vreg[src2].u16[j] + carry;
+ tmp = tmp >> width;
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 32:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = (uint64_t)((uint32_t)sign_extend(rs1, 5))
+ + (uint64_t)env->vfp.vreg[src2].u32[j] + carry;
+ tmp = tmp >> width;
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 64:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = (uint64_t)sign_extend(rs1, 5)
+ + env->vfp.vreg[src2].u64[j] + carry;
+
+ if ((tmp < (uint64_t)sign_extend(rs1, 5) ||
+ tmp < env->vfp.vreg[src2].u64[j])
+ || ((uint64_t)sign_extend(rs1, 5) == MAX_U64 &&
+ env->vfp.vreg[src2].u64[j] == MAX_U64)) {
+ tmp = 1;
+ } else {
+ tmp = 0;
+ }
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsbc_vvm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax, carry;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
+ - env->vfp.vreg[src1].u8[j] - carry;
+ break;
+ case 16:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ - env->vfp.vreg[src1].u16[j] - carry;
+ break;
+ case 32:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ - env->vfp.vreg[src1].u32[j] - carry;
+ break;
+ case 64:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ - env->vfp.vreg[src1].u64[j] - carry;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vsbc_vxm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax, carry;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
+ - env->gpr[rs1] - carry;
+ break;
+ case 16:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ - env->gpr[rs1] - carry;
+ break;
+ case 32:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ - env->gpr[rs1] - carry;
+ break;
+ case 64:
+ carry = vector_get_carry(env, width, lmul, i);
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ - (uint64_t)extend_gpr(env->gpr[rs1]) - carry;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsbc_vvm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, vlmax, carry;
+ uint64_t tmp;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
+ || (rd == 0)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = env->vfp.vreg[src2].u8[j]
+ - env->vfp.vreg[src1].u8[j] - carry;
+ tmp = (tmp >> width) & 0x1;
+
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 16:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = env->vfp.vreg[src2].u16[j]
+ - env->vfp.vreg[src1].u16[j] - carry;
+ tmp = (tmp >> width) & 0x1;
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 32:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = (uint64_t)env->vfp.vreg[src2].u32[j]
+ - (uint64_t)env->vfp.vreg[src1].u32[j] - carry;
+ tmp = (tmp >> width) & 0x1;
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 64:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = env->vfp.vreg[src2].u64[j]
+ - env->vfp.vreg[src1].u64[j] - carry;
+
+ if (((env->vfp.vreg[src1].u64[j] == MAX_U64) && carry) ||
+ env->vfp.vreg[src2].u64[j] <
+ (env->vfp.vreg[src1].u64[j] + carry)) {
+ tmp = 1;
+ } else {
+ tmp = 0;
+ }
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsbc_vxm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax, carry;
+ uint64_t tmp, extend_rs1;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
+ || (rd == 0)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = env->vfp.vreg[src2].u8[j]
+ - (uint8_t)env->gpr[rs1] - carry;
+ tmp = (tmp >> width) & 0x1;
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 16:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = env->vfp.vreg[src2].u16[j]
+ - (uint16_t)env->gpr[rs1] - carry;
+ tmp = (tmp >> width) & 0x1;
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 32:
+ carry = vector_get_carry(env, width, lmul, i);
+ tmp = (uint64_t)env->vfp.vreg[src2].u32[j]
+ - (uint64_t)((uint32_t)env->gpr[rs1]) - carry;
+ tmp = (tmp >> width) & 0x1;
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+ case 64:
+ carry = vector_get_carry(env, width, lmul, i);
+
+ extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]);
+ tmp = env->vfp.vreg[src2].u64[j] - extend_rs1 - carry;
+
+ if ((tmp > env->vfp.vreg[src2].u64[j]) ||
+ ((extend_rs1 == MAX_U64) && carry)) {
+ tmp = 1;
+ } else {
+ tmp = 0;
+ }
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ break;
+
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vmpopc.m rd, vs2, v0.t # x[rd] = sum_i ( vs2[i].LSB && v0[i].LSB ) */
+void VECTOR_HELPER(vmpopc_m)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i;
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ env->gpr[rd] = 0;
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < vl) {
+ if (vector_mask_reg(env, rs2, width, lmul, i) &&
+ vector_elem_mask(env, vm, width, lmul, i)) {
+ env->gpr[rd]++;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmfirst.m rd, vs2, vm */
+void VECTOR_HELPER(vmfirst_m)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i;
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < vl) {
+ if (vector_mask_reg(env, rs2, width, lmul, i) &&
+ vector_elem_mask(env, vm, width, lmul, i)) {
+ env->gpr[rd] = i;
+ break;
+ }
+ } else {
+ env->gpr[rd] = -1;
+ }
+ }
+
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vmerge_vvm)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl, idx, pos;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vm == 0) {
+ vector_get_layout(env, width, lmul, i, &idx, &pos);
+ if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src2].u8[j];
+ } else {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src1].u8[j];
+ }
+ } else {
+ if (rs2 != 0) {
+ riscv_raise_exception(env,
+ RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j];
+ }
+ break;
+ case 16:
+ if (vm == 0) {
+ vector_get_layout(env, width, lmul, i, &idx, &pos);
+ if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src2].u16[j];
+ } else {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src1].u16[j];
+ }
+ } else {
+ if (rs2 != 0) {
+ riscv_raise_exception(env,
+ RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j];
+ }
+ break;
+ case 32:
+ if (vm == 0) {
+ vector_get_layout(env, width, lmul, i, &idx, &pos);
+ if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src2].u32[j];
+ } else {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src1].u32[j];
+ }
+ } else {
+ if (rs2 != 0) {
+ riscv_raise_exception(env,
+ RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j];
+ }
+ break;
+ case 64:
+ if (vm == 0) {
+ vector_get_layout(env, width, lmul, i, &idx, &pos);
+ if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src2].u64[j];
+ } else {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src1].u64[j];
+ }
+ } else {
+ if (rs2 != 0) {
+ riscv_raise_exception(env,
+ RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmerge_vxm)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl, idx, pos;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vm == 0) {
+ vector_get_layout(env, width, lmul, i, &idx, &pos);
+ if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src2].u8[j];
+ } else {
+ env->vfp.vreg[dest].u8[j] = env->gpr[rs1];
+ }
+ } else {
+ if (rs2 != 0) {
+ riscv_raise_exception(env,
+ RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ env->vfp.vreg[dest].u8[j] = env->gpr[rs1];
+ }
+ break;
+ case 16:
+ if (vm == 0) {
+ vector_get_layout(env, width, lmul, i, &idx, &pos);
+ if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src2].u16[j];
+ } else {
+ env->vfp.vreg[dest].u16[j] = env->gpr[rs1];
+ }
+ } else {
+ if (rs2 != 0) {
+ riscv_raise_exception(env,
+ RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ env->vfp.vreg[dest].u16[j] = env->gpr[rs1];
+ }
+ break;
+ case 32:
+ if (vm == 0) {
+ vector_get_layout(env, width, lmul, i, &idx, &pos);
+ if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src2].u32[j];
+ } else {
+ env->vfp.vreg[dest].u32[j] = env->gpr[rs1];
+ }
+ } else {
+ if (rs2 != 0) {
+ riscv_raise_exception(env,
+ RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ env->vfp.vreg[dest].u32[j] = env->gpr[rs1];
+ }
+ break;
+ case 64:
+ if (vm == 0) {
+ vector_get_layout(env, width, lmul, i, &idx, &pos);
+ if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src2].u64[j];
+ } else {
+ env->vfp.vreg[dest].u64[j] =
+ (uint64_t)extend_gpr(env->gpr[rs1]);
+ }
+ } else {
+ if (rs2 != 0) {
+ riscv_raise_exception(env,
+ RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ env->vfp.vreg[dest].u64[j] =
+ (uint64_t)extend_gpr(env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmerge_vim)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl, idx, pos;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vm == 0) {
+ vector_get_layout(env, width, lmul, i, &idx, &pos);
+ if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
+ env->vfp.vreg[dest].u8[j] =
+ env->vfp.vreg[src2].u8[j];
+ } else {
+ env->vfp.vreg[dest].u8[j] =
+ (uint8_t)sign_extend(rs1, 5);
+ }
+ } else {
+ if (rs2 != 0) {
+ riscv_raise_exception(env,
+ RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ env->vfp.vreg[dest].u8[j] = (uint8_t)sign_extend(rs1, 5);
+ }
+ break;
+ case 16:
+ if (vm == 0) {
+ vector_get_layout(env, width, lmul, i, &idx, &pos);
+ if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
+ env->vfp.vreg[dest].u16[j] =
+ env->vfp.vreg[src2].u16[j];
+ } else {
+ env->vfp.vreg[dest].u16[j] =
+ (uint16_t)sign_extend(rs1, 5);
+ }
+ } else {
+ if (rs2 != 0) {
+ riscv_raise_exception(env,
+ RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ env->vfp.vreg[dest].u16[j] = (uint16_t)sign_extend(rs1, 5);
+ }
+ break;
+ case 32:
+ if (vm == 0) {
+ vector_get_layout(env, width, lmul, i, &idx, &pos);
+ if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
+ env->vfp.vreg[dest].u32[j] =
+ env->vfp.vreg[src2].u32[j];
+ } else {
+ env->vfp.vreg[dest].u32[j] =
+ (uint32_t)sign_extend(rs1, 5);
+ }
+ } else {
+ if (rs2 != 0) {
+ riscv_raise_exception(env,
+ RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ env->vfp.vreg[dest].u32[j] = (uint32_t)sign_extend(rs1, 5);
+ }
+ break;
+ case 64:
+ if (vm == 0) {
+ vector_get_layout(env, width, lmul, i, &idx, &pos);
+ if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
+ env->vfp.vreg[dest].u64[j] =
+ env->vfp.vreg[src2].u64[j];
+ } else {
+ env->vfp.vreg[dest].u64[j] =
+ (uint64_t)sign_extend(rs1, 5);
+ }
+ } else {
+ if (rs2 != 0) {
+ riscv_raise_exception(env,
+ RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ env->vfp.vreg[dest].u64[j] = (uint64_t)sign_extend(rs1, 5);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfmerge.vfm vd, vs2, rs1, v0 # vd[i] = v0[i].LSB ? f[rs1] : vs2[i] */
+void VECTOR_HELPER(vfmerge_vfm)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* vfmv.v.f vd, rs1 # vd[i] = f[rs1]; */
+ if (vm && (rs2 != 0)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = env->fpr[rs1];
+ } else {
+ env->vfp.vreg[dest].f16[j] = env->vfp.vreg[src2].f16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = env->fpr[rs1];
+ } else {
+ env->vfp.vreg[dest].f32[j] = env->vfp.vreg[src2].f32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = env->fpr[rs1];
+ } else {
+ env->vfp.vreg[dest].f64[j] = env->vfp.vreg[src2].f64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vmseq_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u8[j] ==
+ env->vfp.vreg[src2].u8[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u16[j] ==
+ env->vfp.vreg[src2].u16[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u32[j] ==
+ env->vfp.vreg[src2].u32[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u64[j] ==
+ env->vfp.vreg[src2].u64[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmseq_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint8_t)env->gpr[rs1] == env->vfp.vreg[src2].u8[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint16_t)env->gpr[rs1] == env->vfp.vreg[src2].u16[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint32_t)env->gpr[rs1] == env->vfp.vreg[src2].u32[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint64_t)extend_gpr(env->gpr[rs1]) ==
+ env->vfp.vreg[src2].u64[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmseq_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint8_t)sign_extend(rs1, 5)
+ == env->vfp.vreg[src2].u8[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint16_t)sign_extend(rs1, 5)
+ == env->vfp.vreg[src2].u16[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint32_t)sign_extend(rs1, 5)
+ == env->vfp.vreg[src2].u32[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint64_t)sign_extend(rs1, 5) ==
+ env->vfp.vreg[src2].u64[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vmandnot.mm vd, vs2, vs1 # vd = vs2 & ~vs1 */
+void VECTOR_HELPER(vmandnot_mm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, i, vlmax;
+ uint32_t tmp;
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ tmp = ~vector_mask_reg(env, rs1, width, lmul, i) &
+ vector_mask_reg(env, rs2, width, lmul, i);
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+
+ env->vfp.vstart = 0;
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmfeq.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vmfeq_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src1, src2, result;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float16_eq_quiet(env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float32_eq_quiet(env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float64_eq_quiet(env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ case 32:
+ case 64:
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmfeq.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vmfeq_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2, result;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float16_eq_quiet(env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float32_eq_quiet(env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float64_eq_quiet(env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ case 32:
+ case 64:
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vmsne_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u8[j] !=
+ env->vfp.vreg[src2].u8[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u16[j] !=
+ env->vfp.vreg[src2].u16[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u32[j] !=
+ env->vfp.vreg[src2].u32[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u64[j] !=
+ env->vfp.vreg[src2].u64[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsne_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint8_t)env->gpr[rs1] != env->vfp.vreg[src2].u8[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint16_t)env->gpr[rs1] != env->vfp.vreg[src2].u16[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint32_t)env->gpr[rs1] != env->vfp.vreg[src2].u32[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint64_t)extend_gpr(env->gpr[rs1]) !=
+ env->vfp.vreg[src2].u64[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsne_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint8_t)sign_extend(rs1, 5)
+ != env->vfp.vreg[src2].u8[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint16_t)sign_extend(rs1, 5)
+ != env->vfp.vreg[src2].u16[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint32_t)sign_extend(rs1, 5)
+ != env->vfp.vreg[src2].u32[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint64_t)sign_extend(rs1, 5) !=
+ env->vfp.vreg[src2].u64[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vmand.mm vd, vs2, vs1 # vd = vs2 & vs1 */
+void VECTOR_HELPER(vmand_mm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, i, vlmax;
+ uint32_t tmp;
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ tmp = vector_mask_reg(env, rs1, width, lmul, i) &
+ vector_mask_reg(env, rs2, width, lmul, i);
+ vector_mask_result(env, rd, width, lmul, i, tmp);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+
+ env->vfp.vstart = 0;
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmfle.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vmfle_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src1, src2, result;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float16_le(env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[src1].f16[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float32_le(env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[src1].f32[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float64_le(env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[src1].f64[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ case 32:
+ case 64:
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmfle.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vmfle_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2, result;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float16_le(env->vfp.vreg[src2].f16[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float32_le(env->vfp.vreg[src2].f32[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float64_le(env->vfp.vreg[src2].f64[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ case 32:
+ case 64:
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsltu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u8[j] <
+ env->vfp.vreg[src1].u8[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u16[j] <
+ env->vfp.vreg[src1].u16[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u32[j] <
+ env->vfp.vreg[src1].u32[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u64[j] <
+ env->vfp.vreg[src1].u64[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsltu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u8[j] < (uint8_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u16[j] < (uint16_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u32[j] < (uint32_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u64[j] <
+ (uint64_t)extend_gpr(env->gpr[rs1])) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vmor.mm vd, vs2, vs1 # vd = vs2 | vs1 */
+void VECTOR_HELPER(vmor_mm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, i, vlmax;
+ uint32_t tmp;
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ tmp = vector_mask_reg(env, rs1, width, lmul, i) |
+ vector_mask_reg(env, rs2, width, lmul, i);
+ vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+
+ env->vfp.vstart = 0;
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmford.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vmford_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src1, src2, result;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float16_unordered_quiet(env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float32_unordered_quiet(env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float64_unordered_quiet(env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ case 32:
+ case 64:
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmford.vf vd, vs2, rs1, vm # Vector-scalar */
+void VECTOR_HELPER(vmford_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2, result;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float16_unordered_quiet(env->vfp.vreg[src2].f16[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float32_unordered_quiet(env->vfp.vreg[src2].f32[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float64_unordered_quiet(env->vfp.vreg[src2].f64[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ case 32:
+ case 64:
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmslt_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s8[j] <
+ env->vfp.vreg[src1].s8[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s16[j] <
+ env->vfp.vreg[src1].s16[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s32[j] <
+ env->vfp.vreg[src1].s32[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s64[j] <
+ env->vfp.vreg[src1].s64[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmslt_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s8[j] < (int8_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s16[j] < (int16_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s32[j] < (int32_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s64[j] <
+ (int64_t)extend_gpr(env->gpr[rs1])) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vmxor.mm vd, vs2, vs1 # vd = vs2 ^ vs1 */
+void VECTOR_HELPER(vmxor_mm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, i, vlmax;
+ uint32_t tmp;
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ tmp = vector_mask_reg(env, rs1, width, lmul, i) ^
+ vector_mask_reg(env, rs2, width, lmul, i);
+ vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+
+ env->vfp.vstart = 0;
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmflt.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vmflt_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src1, src2, result;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float16_lt(env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[src1].f16[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float32_lt(env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[src1].f32[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float64_lt(env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[src1].f64[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ case 32:
+ case 64:
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmflt.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vmflt_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2, result;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float16_lt(env->vfp.vreg[src2].f16[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float32_lt(env->vfp.vreg[src2].f32[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float64_lt(env->vfp.vreg[src2].f64[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, result);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ case 32:
+ case 64:
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsleu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u8[j] <=
+ env->vfp.vreg[src1].u8[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u16[j] <=
+ env->vfp.vreg[src1].u16[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u32[j] <=
+ env->vfp.vreg[src1].u32[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u64[j] <=
+ env->vfp.vreg[src1].u64[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsleu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u8[j] <= (uint8_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u16[j] <= (uint16_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u32[j] <= (uint32_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u64[j] <=
+ (uint64_t)extend_gpr(env->gpr[rs1])) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsleu_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u8[j] <= (uint8_t)rs1) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u16[j] <= (uint16_t)rs1) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u32[j] <= (uint32_t)rs1) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u64[j] <=
+ (uint64_t)rs1) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vmornot.mm vd, vs2, vs1 # vd = vs2 | ~vs1 */
+void VECTOR_HELPER(vmornot_mm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, i, vlmax;
+ uint32_t tmp;
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ tmp = ~vector_mask_reg(env, rs1, width, lmul, i) |
+ vector_mask_reg(env, rs2, width, lmul, i);
+ vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+
+ env->vfp.vstart = 0;
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmfne.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vmfne_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src1, src2, result;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float16_eq_quiet(env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float32_eq_quiet(env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float64_eq_quiet(env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ case 32:
+ case 64:
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmfne.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vmfne_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2, result;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float16_eq_quiet(env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float32_eq_quiet(env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float64_eq_quiet(env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ case 32:
+ case 64:
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsle_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s8[j] <=
+ env->vfp.vreg[src1].s8[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s16[j] <=
+ env->vfp.vreg[src1].s16[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s32[j] <=
+ env->vfp.vreg[src1].s32[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s64[j] <=
+ env->vfp.vreg[src1].s64[j]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsle_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s8[j] <= (int8_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s16[j] <= (int16_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s32[j] <= (int32_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s64[j] <=
+ (int64_t)extend_gpr(env->gpr[rs1])) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsle_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s8[j] <=
+ (int8_t)sign_extend(rs1, 5)) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s16[j] <=
+ (int16_t)sign_extend(rs1, 5)) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s32[j] <=
+ (int32_t)sign_extend(rs1, 5)) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s64[j] <=
+ sign_extend(rs1, 5)) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vmnand.mm vd, vs2, vs1 # vd = ~(vs2 & vs1) */
+void VECTOR_HELPER(vmnand_mm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, i, vlmax;
+ uint32_t tmp;
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ tmp = vector_mask_reg(env, rs1, width, lmul, i) &
+ vector_mask_reg(env, rs2, width, lmul, i);
+ vector_mask_result(env, rd, width, lmul, i, (~tmp & 0x1));
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+
+ env->vfp.vstart = 0;
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmfgt.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vmfgt_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2, result;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float16_le(env->vfp.vreg[src2].f16[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float32_le(env->vfp.vreg[src2].f32[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float64_le(env->vfp.vreg[src2].f64[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ case 32:
+ case 64:
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsgtu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u8[j] > (uint8_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u16[j] > (uint16_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u32[j] > (uint32_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u64[j] >
+ (uint64_t)extend_gpr(env->gpr[rs1])) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsgtu_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u8[j] > (uint8_t)rs1) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u16[j] > (uint16_t)rs1) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u32[j] > (uint32_t)rs1) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].u64[j] >
+ (uint64_t)rs1) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vmnor.mm vd, vs2, vs1 # vd = ~(vs2 | vs1) */
+void VECTOR_HELPER(vmnor_mm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, i, vlmax;
+ uint32_t tmp;
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ tmp = vector_mask_reg(env, rs1, width, lmul, i) |
+ vector_mask_reg(env, rs2, width, lmul, i);
+ vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+
+ env->vfp.vstart = 0;
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vmsgt_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s8[j] > (int8_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s16[j] > (int16_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s32[j] > (int32_t)env->gpr[rs1]) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s64[j] >
+ (int64_t)extend_gpr(env->gpr[rs1])) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmsgt_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s8[j] >
+ (int8_t)sign_extend(rs1, 5)) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s16[j] >
+ (int16_t)sign_extend(rs1, 5)) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s32[j] >
+ (int32_t)sign_extend(rs1, 5)) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src2].s64[j] >
+ sign_extend(rs1, 5)) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ if (width <= 64) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+
+/* vmxnor.mm vd, vs2, vs1 # vd = ~(vs2 ^ vs1) */
+void VECTOR_HELPER(vmxnor_mm)(CPURISCVState *env, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, i, vlmax;
+ uint32_t tmp;
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ tmp = vector_mask_reg(env, rs1, width, lmul, i) ^
+ vector_mask_reg(env, rs2, width, lmul, i);
+ vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1);
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+
+ env->vfp.vstart = 0;
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmfge.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vmfge_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2, result;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float16_lt(env->vfp.vreg[src2].f16[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float32_lt(env->vfp.vreg[src2].f32[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ result = float64_lt(env->vfp.vreg[src2].f64[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ vector_mask_result(env, rd, width, lmul, i, !result);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ case 32:
+ case 64:
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vsaddu.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vsaddu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = sat_add_u8(env,
+ env->vfp.vreg[src1].u8[j], env->vfp.vreg[src2].u8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = sat_add_u16(env,
+ env->vfp.vreg[src1].u16[j], env->vfp.vreg[src2].u16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = sat_add_u32(env,
+ env->vfp.vreg[src1].u32[j], env->vfp.vreg[src2].u32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = sat_add_u64(env,
+ env->vfp.vreg[src1].u64[j], env->vfp.vreg[src2].u64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vsaddu.vx vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vsaddu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = sat_add_u8(env,
+ env->vfp.vreg[src2].u8[j], env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = sat_add_u16(env,
+ env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = sat_add_u32(env,
+ env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = sat_add_u64(env,
+ env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vsaddu.vi vd, vs2, imm, vm # vector-immediate */
+void VECTOR_HELPER(vsaddu_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = sat_add_u8(env,
+ env->vfp.vreg[src2].u8[j], rs1);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = sat_add_u16(env,
+ env->vfp.vreg[src2].u16[j], rs1);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = sat_add_u32(env,
+ env->vfp.vreg[src2].u32[j], rs1);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = sat_add_u64(env,
+ env->vfp.vreg[src2].u64[j], rs1);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vdivu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u8[j] == 0) {
+ env->vfp.vreg[dest].u8[j] = MAX_U8;
+ } else {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] /
+ env->vfp.vreg[src1].u8[j];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u16[j] == 0) {
+ env->vfp.vreg[dest].u16[j] = MAX_U16;
+ } else {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ / env->vfp.vreg[src1].u16[j];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u32[j] == 0) {
+ env->vfp.vreg[dest].u32[j] = MAX_U32;
+ } else {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ / env->vfp.vreg[src1].u32[j];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u64[j] == 0) {
+ env->vfp.vreg[dest].u64[j] = MAX_U64;
+ } else {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ / env->vfp.vreg[src1].u64[j];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vdivu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint8_t)env->gpr[rs1] == 0) {
+ env->vfp.vreg[dest].u8[j] = MAX_U8;
+ } else {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] /
+ (uint8_t)env->gpr[rs1];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint16_t)env->gpr[rs1] == 0) {
+ env->vfp.vreg[dest].u16[j] = MAX_U16;
+ } else {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ / (uint16_t)env->gpr[rs1];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint32_t)env->gpr[rs1] == 0) {
+ env->vfp.vreg[dest].u32[j] = MAX_U32;
+ } else {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ / (uint32_t)env->gpr[rs1];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) {
+ env->vfp.vreg[dest].u64[j] = MAX_U64;
+ } else {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ / (uint64_t)extend_gpr(env->gpr[rs1]);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfdiv.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_div(
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[src1].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_div(
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[src1].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_div(
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[src1].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfdiv.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_div(
+ env->vfp.vreg[src2].f16[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_div(
+ env->vfp.vreg[src2].f32[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_div(
+ env->vfp.vreg[src2].f64[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vsadd.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vsadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = sat_add_s8(env,
+ env->vfp.vreg[src1].s8[j], env->vfp.vreg[src2].s8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = sat_add_s16(env,
+ env->vfp.vreg[src1].s16[j], env->vfp.vreg[src2].s16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = sat_add_s32(env,
+ env->vfp.vreg[src1].s32[j], env->vfp.vreg[src2].s32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = sat_add_s64(env,
+ env->vfp.vreg[src1].s64[j], env->vfp.vreg[src2].s64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vsadd.vx vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vsadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = sat_add_s8(env,
+ env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = sat_add_s16(env,
+ env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = sat_add_s32(env,
+ env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = sat_add_s64(env,
+ env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vsadd.vi vd, vs2, imm, vm # vector-immediate */
+void VECTOR_HELPER(vsadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = sat_add_s8(env,
+ env->vfp.vreg[src2].s8[j], sign_extend(rs1, 5));
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = sat_add_s16(env,
+ env->vfp.vreg[src2].s16[j], sign_extend(rs1, 5));
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = sat_add_s32(env,
+ env->vfp.vreg[src2].s32[j], sign_extend(rs1, 5));
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = sat_add_s64(env,
+ env->vfp.vreg[src2].s64[j], sign_extend(rs1, 5));
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s8[j] == 0) {
+ env->vfp.vreg[dest].s8[j] = -1;
+ } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) &&
+ (env->vfp.vreg[src1].s8[j] == (int8_t)(-1))) {
+ env->vfp.vreg[dest].s8[j] = MIN_S8;
+ } else {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] /
+ env->vfp.vreg[src1].s8[j];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s16[j] == 0) {
+ env->vfp.vreg[dest].s16[j] = -1;
+ } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) &&
+ (env->vfp.vreg[src1].s16[j] == (int16_t)(-1))) {
+ env->vfp.vreg[dest].s16[j] = MIN_S16;
+ } else {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j]
+ / env->vfp.vreg[src1].s16[j];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s32[j] == 0) {
+ env->vfp.vreg[dest].s32[j] = -1;
+ } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) &&
+ (env->vfp.vreg[src1].s32[j] == (int32_t)(-1))) {
+ env->vfp.vreg[dest].s32[j] = MIN_S32;
+ } else {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j]
+ / env->vfp.vreg[src1].s32[j];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s64[j] == 0) {
+ env->vfp.vreg[dest].s64[j] = -1;
+ } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) &&
+ (env->vfp.vreg[src1].s64[j] == (int64_t)(-1))) {
+ env->vfp.vreg[dest].s64[j] = MIN_S64;
+ } else {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j]
+ / env->vfp.vreg[src1].s64[j];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vdiv_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int8_t)env->gpr[rs1] == 0) {
+ env->vfp.vreg[dest].s8[j] = -1;
+ } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) &&
+ ((int8_t)env->gpr[rs1] == (int8_t)(-1))) {
+ env->vfp.vreg[dest].s8[j] = MIN_S8;
+ } else {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] /
+ (int8_t)env->gpr[rs1];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int16_t)env->gpr[rs1] == 0) {
+ env->vfp.vreg[dest].s16[j] = -1;
+ } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) &&
+ ((int16_t)env->gpr[rs1] == (int16_t)(-1))) {
+ env->vfp.vreg[dest].s16[j] = MIN_S16;
+ } else {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j]
+ / (int16_t)env->gpr[rs1];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int32_t)env->gpr[rs1] == 0) {
+ env->vfp.vreg[dest].s32[j] = -1;
+ } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) &&
+ ((int32_t)env->gpr[rs1] == (int32_t)(-1))) {
+ env->vfp.vreg[dest].s32[j] = MIN_S32;
+ } else {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j]
+ / (int32_t)env->gpr[rs1];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) {
+ env->vfp.vreg[dest].s64[j] = -1;
+ } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) &&
+ ((int64_t)extend_gpr(env->gpr[rs1]) == (int64_t)(-1))) {
+ env->vfp.vreg[dest].s64[j] = MIN_S64;
+ } else {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j]
+ / (int64_t)extend_gpr(env->gpr[rs1]);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i] */
+void VECTOR_HELPER(vfrdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_div(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_div(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_div(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vssubu.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vssubu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = sat_sub_u8(env,
+ env->vfp.vreg[src2].u8[j], env->vfp.vreg[src1].u8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = sat_sub_u16(env,
+ env->vfp.vreg[src2].u16[j], env->vfp.vreg[src1].u16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = sat_sub_u32(env,
+ env->vfp.vreg[src2].u32[j], env->vfp.vreg[src1].u32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = sat_sub_u64(env,
+ env->vfp.vreg[src2].u64[j], env->vfp.vreg[src1].u64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vssubu.vx vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vssubu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = sat_sub_u8(env,
+ env->vfp.vreg[src2].u8[j], env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = sat_sub_u16(env,
+ env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = sat_sub_u32(env,
+ env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = sat_sub_u64(env,
+ env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vremu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u8[j] == 0) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j];
+ } else {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] %
+ env->vfp.vreg[src1].u8[j];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u16[j] == 0) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j];
+ } else {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ % env->vfp.vreg[src1].u16[j];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u32[j] == 0) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j];
+ } else {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ % env->vfp.vreg[src1].u32[j];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].u64[j] == 0) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j];
+ } else {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ % env->vfp.vreg[src1].u64[j];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vremu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint8_t)env->gpr[rs1] == 0) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j];
+ } else {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] %
+ (uint8_t)env->gpr[rs1];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint16_t)env->gpr[rs1] == 0) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j];
+ } else {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ % (uint16_t)env->gpr[rs1];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint32_t)env->gpr[rs1] == 0) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j];
+ } else {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ % (uint32_t)env->gpr[rs1];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j];
+ } else {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ % (uint64_t)extend_gpr(env->gpr[rs1]);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vmsbf.m vd, vs2, vm # set-before-first mask bit */
+void VECTOR_HELPER(vmsbf_m)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i;
+ bool first_mask_bit = false;
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < vl) {
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (first_mask_bit) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ continue;
+ }
+ if (!vector_mask_reg(env, rs2, width, lmul, i)) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ first_mask_bit = true;
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmsif.m vd, vs2, vm # set-including-first mask bit */
+void VECTOR_HELPER(vmsif_m)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i;
+ bool first_mask_bit = false;
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < vl) {
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (first_mask_bit) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ continue;
+ }
+ if (!vector_mask_reg(env, rs2, width, lmul, i)) {
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ } else {
+ first_mask_bit = true;
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ }
+ }
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vmsof.m vd, vs2, vm # set-only-first mask bit */
+void VECTOR_HELPER(vmsof_m)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i;
+ bool first_mask_bit = false;
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ if (i < vl) {
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (first_mask_bit) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ continue;
+ }
+ if (!vector_mask_reg(env, rs2, width, lmul, i)) {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ } else {
+ first_mask_bit = true;
+ vector_mask_result(env, rd, width, lmul, i, 1);
+ }
+ }
+ } else {
+ vector_mask_result(env, rd, width, lmul, i, 0);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* viota.m v4, v2, v0.t */
+void VECTOR_HELPER(viota_m)(CPURISCVState *env, uint32_t vm, uint32_t rs2,
+ uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest;
+ uint32_t sum = 0;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 1)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = sum;
+ if (vector_mask_reg(env, rs2, width, lmul, i)) {
+ sum++;
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = sum;
+ if (vector_mask_reg(env, rs2, width, lmul, i)) {
+ sum++;
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = sum;
+ if (vector_mask_reg(env, rs2, width, lmul, i)) {
+ sum++;
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = sum;
+ if (vector_mask_reg(env, rs2, width, lmul, i)) {
+ sum++;
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vid.v vd, vm # Write element ID to destination. */
+void VECTOR_HELPER(vid_v)(CPURISCVState *env, uint32_t vm, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rd, false);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = i;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = i;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = i;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = i;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vssub.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vssub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = sat_sub_s8(env,
+ env->vfp.vreg[src2].s8[j], env->vfp.vreg[src1].s8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = sat_sub_s16(env,
+ env->vfp.vreg[src2].s16[j], env->vfp.vreg[src1].s16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = sat_sub_s32(env,
+ env->vfp.vreg[src2].s32[j], env->vfp.vreg[src1].s32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = sat_sub_s64(env,
+ env->vfp.vreg[src2].s64[j], env->vfp.vreg[src1].s64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vssub.vx vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vssub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = sat_sub_s8(env,
+ env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = sat_sub_s16(env,
+ env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = sat_sub_s32(env,
+ env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = sat_sub_s64(env,
+ env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vrem_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s8[j] == 0) {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j];
+ } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) &&
+ (env->vfp.vreg[src1].s8[j] == (int8_t)(-1))) {
+ env->vfp.vreg[dest].s8[j] = 0;
+ } else {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] %
+ env->vfp.vreg[src1].s8[j];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s16[j] == 0) {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j];
+ } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) &&
+ (env->vfp.vreg[src1].s16[j] == (int16_t)(-1))) {
+ env->vfp.vreg[dest].s16[j] = 0;
+ } else {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j]
+ % env->vfp.vreg[src1].s16[j];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s32[j] == 0) {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j];
+ } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) &&
+ (env->vfp.vreg[src1].s32[j] == (int32_t)(-1))) {
+ env->vfp.vreg[dest].s32[j] = 0;
+ } else {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j]
+ % env->vfp.vreg[src1].s32[j];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if (env->vfp.vreg[src1].s64[j] == 0) {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j];
+ } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) &&
+ (env->vfp.vreg[src1].s64[j] == (int64_t)(-1))) {
+ env->vfp.vreg[dest].s64[j] = 0;
+ } else {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j]
+ % env->vfp.vreg[src1].s64[j];
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vrem_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int8_t)env->gpr[rs1] == 0) {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j];
+ } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) &&
+ ((int8_t)env->gpr[rs1] == (int8_t)(-1))) {
+ env->vfp.vreg[dest].s8[j] = 0;
+ } else {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] %
+ (int8_t)env->gpr[rs1];
+ }
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int16_t)env->gpr[rs1] == 0) {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j];
+ } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) &&
+ ((int16_t)env->gpr[rs1] == (int16_t)(-1))) {
+ env->vfp.vreg[dest].s16[j] = 0;
+ } else {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j]
+ % (int16_t)env->gpr[rs1];
+ }
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int32_t)env->gpr[rs1] == 0) {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j];
+ } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) &&
+ ((int32_t)env->gpr[rs1] == (int32_t)(-1))) {
+ env->vfp.vreg[dest].s32[j] = 0;
+ } else {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j]
+ % (int32_t)env->gpr[rs1];
+ }
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j];
+ } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) &&
+ ((int64_t)extend_gpr(env->gpr[rs1]) == (int64_t)(-1))) {
+ env->vfp.vreg[dest].s64[j] = 0;
+ } else {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j]
+ % (int64_t)extend_gpr(env->gpr[rs1]);
+ }
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+
+ env->vfp.vstart = 0;
+}
+
+/* vaadd.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vaadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = avg_round_s8(env,
+ env->vfp.vreg[src1].s8[j], env->vfp.vreg[src2].s8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = avg_round_s16(env,
+ env->vfp.vreg[src1].s16[j], env->vfp.vreg[src2].s16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = avg_round_s32(env,
+ env->vfp.vreg[src1].s32[j], env->vfp.vreg[src2].s32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = avg_round_s64(env,
+ env->vfp.vreg[src1].s64[j], env->vfp.vreg[src2].s64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vaadd.vx vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vaadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = avg_round_s8(env,
+ env->gpr[rs1], env->vfp.vreg[src2].s8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = avg_round_s16(env,
+ env->gpr[rs1], env->vfp.vreg[src2].s16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = avg_round_s32(env,
+ env->gpr[rs1], env->vfp.vreg[src2].s32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = avg_round_s64(env,
+ env->gpr[rs1], env->vfp.vreg[src2].s64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vaadd.vi vd, vs2, imm, vm # vector-immediate */
+void VECTOR_HELPER(vaadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = avg_round_s8(env,
+ rs1, env->vfp.vreg[src2].s8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = avg_round_s16(env,
+ rs1, env->vfp.vreg[src2].s16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = avg_round_s32(env,
+ rs1, env->vfp.vreg[src2].s32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = avg_round_s64(env,
+ rs1, env->vfp.vreg[src2].s64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vmulhu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] =
+ ((uint16_t)env->vfp.vreg[src1].u8[j]
+ * (uint16_t)env->vfp.vreg[src2].u8[j]) >> width;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] =
+ ((uint32_t)env->vfp.vreg[src1].u16[j]
+ * (uint32_t)env->vfp.vreg[src2].u16[j]) >> width;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] =
+ ((uint64_t)env->vfp.vreg[src1].u32[j]
+ * (uint64_t)env->vfp.vreg[src2].u32[j]) >> width;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = u64xu64_lh(
+ env->vfp.vreg[src1].u64[j], env->vfp.vreg[src2].u64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmulhu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] =
+ ((uint16_t)(uint8_t)env->gpr[rs1]
+ * (uint16_t)env->vfp.vreg[src2].u8[j]) >> width;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] =
+ ((uint32_t)(uint16_t)env->gpr[rs1]
+ * (uint32_t)env->vfp.vreg[src2].u16[j]) >> width;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] =
+ ((uint64_t)(uint32_t)env->gpr[rs1]
+ * (uint64_t)env->vfp.vreg[src2].u32[j]) >> width;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = u64xu64_lh(
+ (uint64_t)extend_gpr(env->gpr[rs1])
+ , env->vfp.vreg[src2].u64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+
+ env->vfp.vstart = 0;
+}
+
+/* vfmul.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_mul(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_mul(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_mul(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfmul.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_mul(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_mul(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_mul(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vsll_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
+ << (env->vfp.vreg[src1].u8[j] & 0x7);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ << (env->vfp.vreg[src1].u16[j] & 0xf);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ << (env->vfp.vreg[src1].u32[j] & 0x1f);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ << (env->vfp.vreg[src1].u64[j] & 0x3f);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsll_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
+ << (env->gpr[rs1] & 0x7);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ << (env->gpr[rs1] & 0xf);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ << (env->gpr[rs1] & 0x1f);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ << ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsll_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
+ << (rs1);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ << (rs1);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ << (rs1);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ << (rs1);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src1].s8[j]
+ * env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src1].s16[j]
+ * env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src1].s32[j]
+ * env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src1].s64[j]
+ * env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = env->gpr[rs1]
+ * env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = env->gpr[rs1]
+ * env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = env->gpr[rs1]
+ * env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] =
+ (int64_t)extend_gpr(env->gpr[rs1])
+ * env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vasub.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vasub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = avg_round_s8(
+ env,
+ ~env->vfp.vreg[src1].s8[j] + 1,
+ env->vfp.vreg[src2].s8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = avg_round_s16(
+ env,
+ ~env->vfp.vreg[src1].s16[j] + 1,
+ env->vfp.vreg[src2].s16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = avg_round_s32(
+ env,
+ ~env->vfp.vreg[src1].s32[j] + 1,
+ env->vfp.vreg[src2].s32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = avg_round_s64(
+ env,
+ ~env->vfp.vreg[src1].s64[j] + 1,
+ env->vfp.vreg[src2].s64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+
+ env->vfp.vstart = 0;
+}
+
+/* vasub.vx vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vasub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = avg_round_s8(
+ env, ~env->gpr[rs1] + 1, env->vfp.vreg[src2].s8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = avg_round_s16(
+ env, ~env->gpr[rs1] + 1, env->vfp.vreg[src2].s16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = avg_round_s32(
+ env, ~env->gpr[rs1] + 1, env->vfp.vreg[src2].s32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = avg_round_s64(
+ env, ~env->gpr[rs1] + 1, env->vfp.vreg[src2].s64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vmulhsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] =
+ ((uint16_t)env->vfp.vreg[src1].u8[j]
+ * (int16_t)env->vfp.vreg[src2].s8[j]) >> width;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] =
+ ((uint32_t)env->vfp.vreg[src1].u16[j]
+ * (int32_t)env->vfp.vreg[src2].s16[j]) >> width;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] =
+ ((uint64_t)env->vfp.vreg[src1].u32[j]
+ * (int64_t)env->vfp.vreg[src2].s32[j]) >> width;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = s64xu64_lh(
+ env->vfp.vreg[src2].s64[j], env->vfp.vreg[src1].u64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmulhsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] =
+ ((uint16_t)(uint8_t)env->gpr[rs1]
+ * (int16_t)env->vfp.vreg[src2].s8[j]) >> width;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] =
+ ((uint32_t)(uint16_t)env->gpr[rs1]
+ * (int32_t)env->vfp.vreg[src2].s16[j]) >> width;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] =
+ ((uint64_t)(uint32_t)env->gpr[rs1]
+ * (int64_t)env->vfp.vreg[src2].s32[j]) >> width;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = s64xu64_lh(
+ env->vfp.vreg[src2].s64[j],
+ (uint64_t)extend_gpr(env->gpr[rs1]));
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vsmul.vv vd, vs2, vs1, vm # vd[i] = clip((vs2[i]*vs1[i]+round)>>(SEW-1)) */
+void VECTOR_HELPER(vsmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if ((!(vm)) && rd == 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = vsmul_8(env,
+ env->vfp.vreg[src1].s8[j], env->vfp.vreg[src2].s8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = vsmul_16(env,
+ env->vfp.vreg[src1].s16[j], env->vfp.vreg[src2].s16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = vsmul_32(env,
+ env->vfp.vreg[src1].s32[j], env->vfp.vreg[src2].s32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = vsmul_64(env,
+ env->vfp.vreg[src1].s64[j], env->vfp.vreg[src2].s64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vsmul.vx vd, vs2, rs1, vm # vd[i] = clip((vs2[i]*x[rs1]+round)>>(SEW-1)) */
+void VECTOR_HELPER(vsmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if ((!(vm)) && rd == 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = vsmul_8(env,
+ env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = vsmul_16(env,
+ env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = vsmul_32(env,
+ env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = vsmul_64(env,
+ env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vmulh_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] =
+ ((int16_t)env->vfp.vreg[src1].s8[j]
+ * (int16_t)env->vfp.vreg[src2].s8[j]) >> width;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] =
+ ((int32_t)env->vfp.vreg[src1].s16[j]
+ * (int32_t)env->vfp.vreg[src2].s16[j]) >> width;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] =
+ ((int64_t)env->vfp.vreg[src1].s32[j]
+ * (int64_t)env->vfp.vreg[src2].s32[j]) >> width;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = s64xs64_lh(
+ env->vfp.vreg[src1].s64[j], env->vfp.vreg[src2].s64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmulh_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] =
+ ((int16_t)(int8_t)env->gpr[rs1]
+ * (int16_t)env->vfp.vreg[src2].s8[j]) >> width;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] =
+ ((int32_t)(int16_t)env->gpr[rs1]
+ * (int32_t)env->vfp.vreg[src2].s16[j]) >> width;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] =
+ ((int64_t)(int32_t)env->gpr[rs1]
+ * (int64_t)env->vfp.vreg[src2].s32[j]) >> width;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = s64xs64_lh(
+ (int64_t)extend_gpr(env->gpr[rs1])
+ , env->vfp.vreg[src2].s64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfrsub.vf vd, vs2, rs1, vm # Scalar-vector vd[i] = f[rs1] - vs2[i] */
+void VECTOR_HELPER(vfrsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_sub(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_sub(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_sub(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vsrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
+ >> (env->vfp.vreg[src1].u8[j] & 0x7);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ >> (env->vfp.vreg[src1].u16[j] & 0xf);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ >> (env->vfp.vreg[src1].u32[j] & 0x1f);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ >> (env->vfp.vreg[src1].u64[j] & 0x3f);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vsrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
+ >> (env->gpr[rs1] & 0x7);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ >> (env->gpr[rs1] & 0xf);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ >> (env->gpr[rs1] & 0x1f);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ >> ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vsrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
+ >> (rs1);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
+ >> (rs1);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
+ >> (rs1);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
+ >> (rs1);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfmadd.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) + vs2[i] */
+void VECTOR_HELPER(vfmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfmadd.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) + vs2[i] */
+void VECTOR_HELPER(vfmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vsra_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
+ >> (env->vfp.vreg[src1].s8[j] & 0x7);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j]
+ >> (env->vfp.vreg[src1].s16[j] & 0xf);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j]
+ >> (env->vfp.vreg[src1].s32[j] & 0x1f);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j]
+ >> (env->vfp.vreg[src1].s64[j] & 0x3f);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsra_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
+ >> (env->gpr[rs1] & 0x7);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j]
+ >> (env->gpr[rs1] & 0xf);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j]
+ >> (env->gpr[rs1] & 0x1f);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j]
+ >> ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsra_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
+ >> (rs1);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j]
+ >> (rs1);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j]
+ >> (rs1);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j]
+ >> (rs1);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src1].s8[j]
+ * env->vfp.vreg[dest].s8[j]
+ + env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src1].s16[j]
+ * env->vfp.vreg[dest].s16[j]
+ + env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src1].s32[j]
+ * env->vfp.vreg[dest].s32[j]
+ + env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src1].s64[j]
+ * env->vfp.vreg[dest].s64[j]
+ + env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = env->gpr[rs1]
+ * env->vfp.vreg[dest].s8[j]
+ + env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = env->gpr[rs1]
+ * env->vfp.vreg[dest].s16[j]
+ + env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = env->gpr[rs1]
+ * env->vfp.vreg[dest].s32[j]
+ + env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] =
+ (int64_t)extend_gpr(env->gpr[rs1])
+ * env->vfp.vreg[dest].s64[j]
+ + env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+
+ env->vfp.vstart = 0;
+}
+
+/* vfnmadd.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) - vs2[i] */
+void VECTOR_HELPER(vfnmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfnmadd.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) - vs2[i] */
+void VECTOR_HELPER(vfnmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vssrl.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i] */
+void VECTOR_HELPER(vssrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = vssrl_8(env,
+ env->vfp.vreg[src2].u8[j], env->vfp.vreg[src1].u8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = vssrl_16(env,
+ env->vfp.vreg[src2].u16[j], env->vfp.vreg[src1].u16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = vssrl_32(env,
+ env->vfp.vreg[src2].u32[j], env->vfp.vreg[src1].u32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = vssrl_64(env,
+ env->vfp.vreg[src2].u64[j], env->vfp.vreg[src1].u64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vssrl.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */
+void VECTOR_HELPER(vssrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = vssrl_8(env,
+ env->vfp.vreg[src2].u8[j], env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = vssrl_16(env,
+ env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = vssrl_32(env,
+ env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = vssrl_64(env,
+ env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vssrl.vi vd, vs2, imm, vm # vd[i] = ((vs2[i] + round)>>imm) */
+void VECTOR_HELPER(vssrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = vssrli_8(env,
+ env->vfp.vreg[src2].u8[j], rs1);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = vssrli_16(env,
+ env->vfp.vreg[src2].u16[j], rs1);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = vssrli_32(env,
+ env->vfp.vreg[src2].u32[j], rs1);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = vssrli_64(env,
+ env->vfp.vreg[src2].u64[j], rs1);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfmsub.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) - vs2[i] */
+void VECTOR_HELPER(vfmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfmsub.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) - vs2[i] */
+void VECTOR_HELPER(vfmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vssra.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i]) */
+void VECTOR_HELPER(vssra_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = vssra_8(env,
+ env->vfp.vreg[src2].s8[j], env->vfp.vreg[src1].u8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = vssra_16(env,
+ env->vfp.vreg[src2].s16[j], env->vfp.vreg[src1].u16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = vssra_32(env,
+ env->vfp.vreg[src2].s32[j], env->vfp.vreg[src1].u32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = vssra_64(env,
+ env->vfp.vreg[src2].s64[j], env->vfp.vreg[src1].u64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vssra.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */
+void VECTOR_HELPER(vssra_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = vssra_8(env,
+ env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = vssra_16(env,
+ env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = vssra_32(env,
+ env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = vssra_64(env,
+ env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vssra.vi vd, vs2, imm, vm # vd[i] = ((vs2[i] + round)>>imm) */
+void VECTOR_HELPER(vssra_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = vssrai_8(env,
+ env->vfp.vreg[src2].s8[j], rs1);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = vssrai_16(env,
+ env->vfp.vreg[src2].s16[j], rs1);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = vssrai_32(env,
+ env->vfp.vreg[src2].s32[j], rs1);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = vssrai_64(env,
+ env->vfp.vreg[src2].s64[j], rs1);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
+ - env->vfp.vreg[src1].s8[j]
+ * env->vfp.vreg[dest].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j]
+ - env->vfp.vreg[src1].s16[j]
+ * env->vfp.vreg[dest].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j]
+ - env->vfp.vreg[src1].s32[j]
+ * env->vfp.vreg[dest].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j]
+ - env->vfp.vreg[src1].s64[j]
+ * env->vfp.vreg[dest].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vnmsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
+ - env->gpr[rs1]
+ * env->vfp.vreg[dest].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j]
+ - env->gpr[rs1]
+ * env->vfp.vreg[dest].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j]
+ - env->gpr[rs1]
+ * env->vfp.vreg[dest].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j]
+ - (int64_t)extend_gpr(env->gpr[rs1])
+ * env->vfp.vreg[dest].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfnmsub.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) + vs2[i] */
+void VECTOR_HELPER(vfnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+
+
+ env->vfp.vstart = 0;
+}
+
+/* vfnmsub.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) + vs2[i] */
+void VECTOR_HELPER(vfnmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vnsrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) ||
+ vector_overlap_vm_common(lmul, vm, rd) ||
+ vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k]
+ >> (env->vfp.vreg[src1].u8[j] & 0xf);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u32[k]
+ >> (env->vfp.vreg[src1].u16[j] & 0x1f);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u64[k]
+ >> (env->vfp.vreg[src1].u32[j] & 0x3f);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_narrow(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vnsrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) ||
+ vector_overlap_vm_common(lmul, vm, rd) ||
+ vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k]
+ >> (env->gpr[rs1] & 0xf);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u32[k]
+ >> (env->gpr[rs1] & 0x1f);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u64[k]
+ >> (env->gpr[rs1] & 0x3f);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_narrow(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vnsrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) ||
+ vector_overlap_vm_common(lmul, vm, rd) ||
+ vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k]
+ >> (rs1);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u32[k]
+ >> (rs1);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u64[k]
+ >> (rs1);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_narrow(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */
+void VECTOR_HELPER(vfmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */
+void VECTOR_HELPER(vfmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vnsra_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) ||
+ vector_overlap_vm_common(lmul, vm, rd) ||
+ vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k]
+ >> (env->vfp.vreg[src1].s8[j] & 0xf);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s32[k]
+ >> (env->vfp.vreg[src1].s16[j] & 0x1f);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s64[k]
+ >> (env->vfp.vreg[src1].s32[j] & 0x3f);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_narrow(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vnsra_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) ||
+ vector_overlap_vm_common(lmul, vm, rd) ||
+ vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k]
+ >> (env->gpr[rs1] & 0xf);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s32[k]
+ >> (env->gpr[rs1] & 0x1f);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s64[k]
+ >> (env->gpr[rs1] & 0x3f);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_narrow(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vnsra_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) ||
+ vector_overlap_vm_common(lmul, vm, rd) ||
+ vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k]
+ >> (rs1);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s32[k]
+ >> (rs1);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s64[k]
+ >> (rs1);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_narrow(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] += env->vfp.vreg[src1].s8[j]
+ * env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] += env->vfp.vreg[src1].s16[j]
+ * env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] += env->vfp.vreg[src1].s32[j]
+ * env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] += env->vfp.vreg[src1].s64[j]
+ * env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] += env->gpr[rs1]
+ * env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] += env->gpr[rs1]
+ * env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] += env->gpr[rs1]
+ * env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] +=
+ (int64_t)extend_gpr(env->gpr[rs1])
+ * env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */
+void VECTOR_HELPER(vfnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */
+void VECTOR_HELPER(vfnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vnclipu.vv vd, vs2, vs1, vm # vector-vector */
+void VECTOR_HELPER(vnclipu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, k, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
+ || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / (2 * width));
+ k = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[k] = vnclipu_16(env,
+ env->vfp.vreg[src2].u16[j], env->vfp.vreg[src1].u8[k]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] = vnclipu_32(env,
+ env->vfp.vreg[src2].u32[j], env->vfp.vreg[src1].u16[k]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] = vnclipu_64(env,
+ env->vfp.vreg[src2].u64[j], env->vfp.vreg[src1].u32[k]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_narrow(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vnclipu.vx vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vnclipu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
+ || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / (2 * width));
+ k = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[k] = vnclipu_16(env,
+ env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] = vnclipu_32(env,
+ env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] = vnclipu_64(env,
+ env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_narrow(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+
+/* vnclipu.vi vd, vs2, imm, vm # vector-immediate */
+void VECTOR_HELPER(vnclipu_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
+ || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / (2 * width));
+ k = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u8[k] = vnclipui_16(env,
+ env->vfp.vreg[src2].u16[j], rs1);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] = vnclipui_32(env,
+ env->vfp.vreg[src2].u32[j], rs1);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] = vnclipui_64(env,
+ env->vfp.vreg[src2].u64[j], rs1);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_narrow(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */
+void VECTOR_HELPER(vfmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */
+void VECTOR_HELPER(vfmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+
+ env->vfp.vstart = 0;
+}
+
+/* vnclip.vv vd, vs2, vs1, vm # vector-vector */
+void VECTOR_HELPER(vnclip_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, k, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
+ || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / (2 * width));
+ k = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[k] = vnclip_16(env,
+ env->vfp.vreg[src2].s16[j], env->vfp.vreg[src1].u8[k]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] = vnclip_32(env,
+ env->vfp.vreg[src2].s32[j], env->vfp.vreg[src1].u16[k]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] = vnclip_64(env,
+ env->vfp.vreg[src2].s64[j], env->vfp.vreg[src1].u32[k]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_narrow(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vnclip.vx vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vnclip_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, k, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
+ || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / (2 * width));
+ k = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[k] = vnclip_16(env,
+ env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] = vnclip_32(env,
+ env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] = vnclip_64(env,
+ env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_narrow(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vnclip.vi vd, vs2, imm, vm # vector-immediate */
+void VECTOR_HELPER(vnclip_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, k, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
+ || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / (2 * width));
+ k = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[k] = vnclipi_16(env,
+ env->vfp.vreg[src2].s16[j], rs1);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] = vnclipi_32(env,
+ env->vfp.vreg[src2].s32[j], rs1);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] = vnclipi_64(env,
+ env->vfp.vreg[src2].s64[j], rs1);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_narrow(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] -= env->vfp.vreg[src1].s8[j]
+ * env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] -= env->vfp.vreg[src1].s16[j]
+ * env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] -= env->vfp.vreg[src1].s32[j]
+ * env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] -= env->vfp.vreg[src1].s64[j]
+ * env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vnmsac_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s8[j] -= env->gpr[rs1]
+ * env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] -= env->gpr[rs1]
+ * env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] -= env->gpr[rs1]
+ * env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] -=
+ (int64_t)extend_gpr(env->gpr[rs1])
+ * env->vfp.vreg[src2].s64[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_common(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */
+void VECTOR_HELPER(vfnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */
+void VECTOR_HELPER(vfnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vwredsumu.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(zero-extend(SEW)) */
+void VECTOR_HELPER(vwredsumu_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ uint64_t sum = 0;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum += env->vfp.vreg[src2].u8[j];
+ }
+ if (i == 0) {
+ sum += env->vfp.vreg[rs1].u16[0];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u16[0] = sum;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum += env->vfp.vreg[src2].u16[j];
+ }
+ if (i == 0) {
+ sum += env->vfp.vreg[rs1].u32[0];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u32[0] = sum;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum += env->vfp.vreg[src2].u32[j];
+ }
+ if (i == 0) {
+ sum += env->vfp.vreg[rs1].u64[0];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].u64[0] = sum;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwaddu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
+ ) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] =
+ (uint16_t)env->vfp.vreg[src1].u8[j] +
+ (uint16_t)env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] =
+ (uint32_t)env->vfp.vreg[src1].u16[j] +
+ (uint32_t)env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] =
+ (uint64_t)env->vfp.vreg[src1].u32[j] +
+ (uint64_t)env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwaddu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
+ ) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] =
+ (uint16_t)env->vfp.vreg[src2].u8[j] +
+ (uint16_t)((uint8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] =
+ (uint32_t)env->vfp.vreg[src2].u16[j] +
+ (uint32_t)((uint16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] =
+ (uint64_t)env->vfp.vreg[src2].u32[j] +
+ (uint64_t)((uint32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfwadd.vv vd, vs2, vs1, vm # vector-vector */
+void VECTOR_HELPER(vfwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_add(
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_add(
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfwadd.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfwadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_add(
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->fpr[rs1], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_add(
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->fpr[rs1], &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vwredsum.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(sign-extend(SEW)) */
+void VECTOR_HELPER(vwredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ int64_t sum = 0;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum += (int16_t)env->vfp.vreg[src2].s8[j] << 8 >> 8;
+ }
+ if (i == 0) {
+ sum += env->vfp.vreg[rs1].s16[0];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].s16[0] = sum;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum += (int32_t)env->vfp.vreg[src2].s16[j] << 16 >> 16;
+ }
+ if (i == 0) {
+ sum += env->vfp.vreg[rs1].s32[0];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].s32[0] = sum;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum += (int64_t)env->vfp.vreg[src2].s32[j] << 32 >> 32;
+ }
+ if (i == 0) {
+ sum += env->vfp.vreg[rs1].s64[0];
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].s64[0] = sum;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] =
+ (int16_t)env->vfp.vreg[src1].s8[j] +
+ (int16_t)env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] =
+ (int32_t)env->vfp.vreg[src1].s16[j] +
+ (int32_t)env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] =
+ (int64_t)env->vfp.vreg[src1].s32[j] +
+ (int64_t)env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] =
+ (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) +
+ (int16_t)((int8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] =
+ (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) +
+ (int32_t)((int16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] =
+ (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) +
+ (int64_t)((int32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vd, vs2, vs1, vm # Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
+void VECTOR_HELPER(vfwredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, src2;
+ float32 sum32 = 0.0f;
+ float64 sum64 = 0.0f;
+
+ lmul = vector_get_lmul(env);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart != 0) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vl = env->vfp.vl;
+ if (vl == 0) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < VLEN / 64; i++) {
+ env->vfp.vreg[rd].u64[i] = 0;
+ }
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+
+ if (i < vl) {
+ switch (width) {
+ case 16:
+ if (i == 0) {
+ sum32 = env->vfp.vreg[rs1].f32[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum32 = float32_add(sum32,
+ float16_to_float32(env->vfp.vreg[src2].f16[j],
+ true, &env->fp_status),
+ &env->fp_status);
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].f32[0] = sum32;
+ }
+ break;
+ case 32:
+ if (i == 0) {
+ sum64 = env->vfp.vreg[rs1].f64[0];
+ }
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ sum64 = float64_add(sum64,
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ &env->fp_status);
+ }
+ if (i == vl - 1) {
+ env->vfp.vreg[rd].f64[0] = sum64;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwsubu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
+ ) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] =
+ (uint16_t)env->vfp.vreg[src2].u8[j] -
+ (uint16_t)env->vfp.vreg[src1].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] =
+ (uint32_t)env->vfp.vreg[src2].u16[j] -
+ (uint32_t)env->vfp.vreg[src1].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] =
+ (uint64_t)env->vfp.vreg[src2].u32[j] -
+ (uint64_t)env->vfp.vreg[src1].u32[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwsubu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
+ ) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] =
+ (uint16_t)env->vfp.vreg[src2].u8[j] -
+ (uint16_t)((uint8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] =
+ (uint32_t)env->vfp.vreg[src2].u16[j] -
+ (uint32_t)((uint16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] =
+ (uint64_t)env->vfp.vreg[src2].u32[j] -
+ (uint64_t)((uint32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfwsub.vv vd, vs2, vs1, vm # vector-vector */
+void VECTOR_HELPER(vfwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_sub(
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_sub(
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfwsub.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfwsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_sub(
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->fpr[rs1], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_sub(
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->fpr[rs1], &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
+ ) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] =
+ (int16_t)env->vfp.vreg[src2].s8[j] -
+ (int16_t)env->vfp.vreg[src1].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] =
+ (int32_t)env->vfp.vreg[src2].s16[j] -
+ (int32_t)env->vfp.vreg[src1].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] =
+ (int64_t)env->vfp.vreg[src2].s32[j] -
+ (int64_t)env->vfp.vreg[src1].s32[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vwsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
+ ) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] =
+ (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) -
+ (int16_t)((int8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] =
+ (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) -
+ (int32_t)((int16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] =
+ (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) -
+ (int64_t)((int32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/*
+ * vfwredosum.vs vd, vs2, vs1, vm #
+ * Ordered reduce 2*SEW = 2*SEW + sum(promote(SEW))
+ */
+void VECTOR_HELPER(vfwredosum_vs)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ helper_vector_vfwredsum_vs(env, vm, rs1, rs2, rd);
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwaddu_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] =
+ (uint16_t)env->vfp.vreg[src1].u8[j] +
+ (uint16_t)env->vfp.vreg[src2].u16[k];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] =
+ (uint32_t)env->vfp.vreg[src1].u16[j] +
+ (uint32_t)env->vfp.vreg[src2].u32[k];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] =
+ (uint64_t)env->vfp.vreg[src1].u32[j] +
+ (uint64_t)env->vfp.vreg[src2].u64[k];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwaddu_wx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] =
+ (uint16_t)env->vfp.vreg[src2].u16[k] +
+ (uint16_t)((uint8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] =
+ (uint32_t)env->vfp.vreg[src2].u32[k] +
+ (uint32_t)((uint16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] =
+ (uint64_t)env->vfp.vreg[src2].u64[k] +
+ (uint64_t)((uint32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfwadd.wv vd, vs2, vs1, vm # vector-vector */
+void VECTOR_HELPER(vfwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_add(
+ env->vfp.vreg[src2].f32[k],
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_add(
+ env->vfp.vreg[src2].f64[k],
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfwadd.wf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfwadd_wf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, k, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_add(
+ env->vfp.vreg[src2].f32[k],
+ float16_to_float32(env->fpr[rs1], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_add(
+ env->vfp.vreg[src2].f64[k],
+ float32_to_float64(env->fpr[rs1], &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] =
+ (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]) +
+ (int16_t)env->vfp.vreg[src2].s16[k];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] =
+ (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]) +
+ (int32_t)env->vfp.vreg[src2].s32[k];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] =
+ (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]) +
+ (int64_t)env->vfp.vreg[src2].s64[k];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwadd_wx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] =
+ (int16_t)env->vfp.vreg[src2].s16[k] +
+ (int16_t)((int8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] =
+ (int32_t)env->vfp.vreg[src2].s32[k] +
+ (int32_t)((int16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] =
+ (int64_t)env->vfp.vreg[src2].s64[k] +
+ (int64_t)((int32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwsubu_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] =
+ (uint16_t)env->vfp.vreg[src2].u16[k] -
+ (uint16_t)env->vfp.vreg[src1].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] =
+ (uint32_t)env->vfp.vreg[src2].u32[k] -
+ (uint32_t)env->vfp.vreg[src1].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] =
+ (uint64_t)env->vfp.vreg[src2].u64[k] -
+ (uint64_t)env->vfp.vreg[src1].u32[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwsubu_wx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] =
+ (uint16_t)env->vfp.vreg[src2].u16[k] -
+ (uint16_t)((uint8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] =
+ (uint32_t)env->vfp.vreg[src2].u32[k] -
+ (uint32_t)((uint16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] =
+ (uint64_t)env->vfp.vreg[src2].u64[k] -
+ (uint64_t)((uint32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfwsub.wv vd, vs2, vs1, vm # vector-vector */
+void VECTOR_HELPER(vfwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_sub(
+ env->vfp.vreg[src2].f32[k],
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_sub(
+ env->vfp.vreg[src2].f64[k],
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfwsub.wf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfwsub_wf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, k, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_sub(
+ env->vfp.vreg[src2].f32[k],
+ float16_to_float32(env->fpr[rs1], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_sub(
+ env->vfp.vreg[src2].f64[k],
+ float32_to_float64(env->fpr[rs1], &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] =
+ (int16_t)env->vfp.vreg[src2].s16[k] -
+ (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] =
+ (int32_t)env->vfp.vreg[src2].s32[k] -
+ (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] =
+ (int64_t)env->vfp.vreg[src2].s64[k] -
+ (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwsub_wx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ dest = rd + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] =
+ (int16_t)env->vfp.vreg[src2].s16[k] -
+ (int16_t)((int8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] =
+ (int32_t)env->vfp.vreg[src2].s32[k] -
+ (int32_t)((int16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] =
+ (int64_t)env->vfp.vreg[src2].s64[k] -
+ (int64_t)((int32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwmulu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] =
+ (uint16_t)env->vfp.vreg[src1].u8[j] *
+ (uint16_t)env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] =
+ (uint32_t)env->vfp.vreg[src1].u16[j] *
+ (uint32_t)env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] =
+ (uint64_t)env->vfp.vreg[src1].u32[j] *
+ (uint64_t)env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vwmulu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] =
+ (uint16_t)env->vfp.vreg[src2].u8[j] *
+ (uint16_t)((uint8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] =
+ (uint32_t)env->vfp.vreg[src2].u16[j] *
+ (uint32_t)((uint16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] =
+ (uint64_t)env->vfp.vreg[src2].u32[j] *
+ (uint64_t)((uint32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfwmul.vv vd, vs2, vs1, vm # vector-vector */
+void VECTOR_HELPER(vfwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_mul(
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_mul(
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+
+ env->vfp.vstart = 0;
+}
+
+/* vfwmul.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfwmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_mul(
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->fpr[rs1], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_mul(
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->fpr[rs1], &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwmulsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] =
+ (int16_t)env->vfp.vreg[src2].s8[j] *
+ (uint16_t)env->vfp.vreg[src1].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] =
+ (int32_t)env->vfp.vreg[src2].s16[j] *
+ (uint32_t)env->vfp.vreg[src1].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] =
+ (int64_t)env->vfp.vreg[src2].s32[j] *
+ (uint64_t)env->vfp.vreg[src1].u32[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vwmulsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] =
+ (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
+ (uint16_t)((uint8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] =
+ (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
+ (uint32_t)((uint16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] =
+ (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
+ (uint64_t)((uint32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] =
+ (int16_t)env->vfp.vreg[src1].s8[j] *
+ (int16_t)env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] =
+ (int32_t)env->vfp.vreg[src1].s16[j] *
+ (int32_t)env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] =
+ (int64_t)env->vfp.vreg[src1].s32[j] *
+ (int64_t)env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vwmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] =
+ (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
+ (int16_t)((int8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] =
+ (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
+ (int32_t)((int16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] =
+ (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
+ (int64_t)((int32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/*
+ * vwsmaccu.vv vd, vs1, vs2, vm #
+ * vd[i] = clipu((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i])
+ */
+void VECTOR_HELPER(vwsmaccu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env,
+ env->vfp.vreg[src2].u8[j],
+ env->vfp.vreg[src1].u8[j],
+ env->vfp.vreg[dest].u16[k]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env,
+ env->vfp.vreg[src2].u16[j],
+ env->vfp.vreg[src1].u16[j],
+ env->vfp.vreg[dest].u32[k]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env,
+ env->vfp.vreg[src2].u32[j],
+ env->vfp.vreg[src1].u32[j],
+ env->vfp.vreg[dest].u64[k]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/*
+ * vwsmaccu.vx vd, rs1, vs2, vm #
+ * vd[i] = clipu((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i])
+ */
+void VECTOR_HELPER(vwsmaccu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env,
+ env->vfp.vreg[src2].u8[j],
+ env->gpr[rs1],
+ env->vfp.vreg[dest].u16[k]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env,
+ env->vfp.vreg[src2].u16[j],
+ env->gpr[rs1],
+ env->vfp.vreg[dest].u32[k]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env,
+ env->vfp.vreg[src2].u32[j],
+ env->gpr[rs1],
+ env->vfp.vreg[dest].u64[k]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwmaccu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] +=
+ (uint16_t)env->vfp.vreg[src1].u8[j] *
+ (uint16_t)env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] +=
+ (uint32_t)env->vfp.vreg[src1].u16[j] *
+ (uint32_t)env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] +=
+ (uint64_t)env->vfp.vreg[src1].u32[j] *
+ (uint64_t)env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vwmaccu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] +=
+ (uint16_t)env->vfp.vreg[src2].u8[j] *
+ (uint16_t)((uint8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] +=
+ (uint32_t)env->vfp.vreg[src2].u16[j] *
+ (uint32_t)((uint16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] +=
+ (uint64_t)env->vfp.vreg[src2].u32[j] *
+ (uint64_t)((uint32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfwmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */
+void VECTOR_HELPER(vfwmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_muladd(
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[dest].f16[j], true,
+ &env->fp_status),
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_muladd(
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[dest].f32[j],
+ &env->fp_status),
+ 0,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f32[k] = 0;
+ case 32:
+ env->vfp.vreg[dest].f64[k] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfwmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */
+void VECTOR_HELPER(vfwmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_muladd(
+ env->fpr[rs1],
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[dest].f16[j], true,
+ &env->fp_status),
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_muladd(
+ env->fpr[rs1],
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[dest].f32[j],
+ &env->fp_status),
+ 0,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f32[k] = 0;
+ case 32:
+ env->vfp.vreg[dest].f64[k] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/*
+ * vwsmacc.vv vd, vs1, vs2, vm #
+ * vd[i] = clip((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i])
+ */
+void VECTOR_HELPER(vwsmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] = vwsmacc_8(env,
+ env->vfp.vreg[src2].s8[j],
+ env->vfp.vreg[src1].s8[j],
+ env->vfp.vreg[dest].s16[k]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] = vwsmacc_16(env,
+ env->vfp.vreg[src2].s16[j],
+ env->vfp.vreg[src1].s16[j],
+ env->vfp.vreg[dest].s32[k]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] = vwsmacc_32(env,
+ env->vfp.vreg[src2].s32[j],
+ env->vfp.vreg[src1].s32[j],
+ env->vfp.vreg[dest].s64[k]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/*
+ * vwsmacc.vx vd, rs1, vs2, vm #
+ * vd[i] = clip((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i])
+ */
+void VECTOR_HELPER(vwsmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] = vwsmacc_8(env,
+ env->vfp.vreg[src2].s8[j],
+ env->gpr[rs1],
+ env->vfp.vreg[dest].s16[k]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] = vwsmacc_16(env,
+ env->vfp.vreg[src2].s16[j],
+ env->gpr[rs1],
+ env->vfp.vreg[dest].s32[k]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] = vwsmacc_32(env,
+ env->vfp.vreg[src2].s32[j],
+ env->gpr[rs1],
+ env->vfp.vreg[dest].s64[k]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/*
+ * vwsmaccsu.vv vd, vs1, vs2, vm
+ * # vd[i] = clip(-((signed(vs1[i])*unsigned(vs2[i])+round)>>SEW/2)+vd[i])
+ */
+void VECTOR_HELPER(vwsmaccsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env,
+ env->vfp.vreg[src2].u8[j],
+ env->vfp.vreg[src1].s8[j],
+ env->vfp.vreg[dest].s16[k]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env,
+ env->vfp.vreg[src2].u16[j],
+ env->vfp.vreg[src1].s16[j],
+ env->vfp.vreg[dest].s32[k]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env,
+ env->vfp.vreg[src2].u32[j],
+ env->vfp.vreg[src1].s32[j],
+ env->vfp.vreg[dest].s64[k]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/*
+ * vwsmaccsu.vx vd, rs1, vs2, vm
+ * # vd[i] = clip(-((signed(x[rs1])*unsigned(vs2[i])+round)>>SEW/2)+vd[i])
+ */
+void VECTOR_HELPER(vwsmaccsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env,
+ env->vfp.vreg[src2].u8[j],
+ env->gpr[rs1],
+ env->vfp.vreg[dest].s16[k]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env,
+ env->vfp.vreg[src2].u16[j],
+ env->gpr[rs1],
+ env->vfp.vreg[dest].s32[k]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env,
+ env->vfp.vreg[src2].u32[j],
+ env->gpr[rs1],
+ env->vfp.vreg[dest].s64[k]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/*
+ * vwsmaccus.vx vd, rs1, vs2, vm
+ * # vd[i] = clip(-((unsigned(x[rs1])*signed(vs2[i])+round)>>SEW/2)+vd[i])
+ */
+void VECTOR_HELPER(vwsmaccus_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ vl = env->vfp.vl;
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] = vwsmaccus_8(env,
+ env->vfp.vreg[src2].s8[j],
+ env->gpr[rs1],
+ env->vfp.vreg[dest].s16[k]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] = vwsmaccus_16(env,
+ env->vfp.vreg[src2].s16[j],
+ env->gpr[rs1],
+ env->vfp.vreg[dest].s32[k]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] = vwsmaccus_32(env,
+ env->vfp.vreg[src2].s32[j],
+ env->gpr[rs1],
+ env->vfp.vreg[dest].s64[k]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+
+void VECTOR_HELPER(vwmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] +=
+ (int16_t)env->vfp.vreg[src1].s8[j]
+ * (int16_t)env->vfp.vreg[src2].s8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] +=
+ (int32_t)env->vfp.vreg[src1].s16[j] *
+ (int32_t)env->vfp.vreg[src2].s16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] +=
+ (int64_t)env->vfp.vreg[src1].s32[j] *
+ (int64_t)env->vfp.vreg[src2].s32[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vwmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] +=
+ (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
+ (int16_t)((int8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] +=
+ (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
+ (int32_t)((int16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] +=
+ (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
+ (int64_t)((int32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfwnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */
+void VECTOR_HELPER(vfwnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_muladd(
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[dest].f16[j], true,
+ &env->fp_status),
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_muladd(
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[dest].f32[j],
+ &env->fp_status),
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f32[k] = 0;
+ case 32:
+ env->vfp.vreg[dest].f64[k] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfwnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */
+void VECTOR_HELPER(vfwnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_muladd(
+ env->fpr[rs1],
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[dest].f16[j], true,
+ &env->fp_status),
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_muladd(
+ env->fpr[rs1],
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[dest].f32[j],
+ &env->fp_status),
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f32[k] = 0;
+ case 32:
+ env->vfp.vreg[dest].f64[k] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwmaccsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src1, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src1 = rs1 + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] +=
+ (int16_t)env->vfp.vreg[src1].s8[j]
+ * (uint16_t)env->vfp.vreg[src2].u8[j];
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] +=
+ (int32_t)env->vfp.vreg[src1].s16[j] *
+ (uint32_t)env->vfp.vreg[src2].u16[j];
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] +=
+ (int64_t)env->vfp.vreg[src1].s32[j] *
+ (uint64_t)env->vfp.vreg[src2].u32[j];
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vwmaccsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] +=
+ (uint16_t)((uint8_t)env->vfp.vreg[src2].u8[j]) *
+ (int16_t)((int8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] +=
+ (uint32_t)((uint16_t)env->vfp.vreg[src2].u16[j]) *
+ (int32_t)((int16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] +=
+ (uint64_t)((uint32_t)env->vfp.vreg[src2].u32[j]) *
+ (int64_t)((int32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfwmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */
+void VECTOR_HELPER(vfwmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_muladd(
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[dest].f16[j], true,
+ &env->fp_status),
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_muladd(
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[dest].f32[j],
+ &env->fp_status),
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f32[k] = 0;
+ case 32:
+ env->vfp.vreg[dest].f64[k] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfwmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */
+void VECTOR_HELPER(vfwmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_muladd(
+ env->fpr[rs1],
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[dest].f16[j], true,
+ &env->fp_status),
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_muladd(
+ env->fpr[rs1],
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[dest].f32[j],
+ &env->fp_status),
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f32[k] = 0;
+ case 32:
+ env->vfp.vreg[dest].f64[k] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vwmaccus_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl;
+ uint32_t lmul, width, src2, dest, vlmax;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = rs2 + (i / (VLEN / width));
+ dest = rd + (i / (VLEN / (2 * width)));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] +=
+ (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
+ (uint16_t)((uint8_t)env->gpr[rs1]);
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] +=
+ (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
+ (uint32_t)((uint16_t)env->gpr[rs1]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] +=
+ (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
+ (uint64_t)((uint32_t)env->gpr[rs1]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+/* vfwnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */
+void VECTOR_HELPER(vfwnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_muladd(
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[dest].f16[j], true,
+ &env->fp_status),
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_muladd(
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[dest].f32[j],
+ &env->fp_status),
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f32[k] = 0;
+ case 32:
+ env->vfp.vreg[dest].f64[k] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+
+ env->vfp.vstart = 0;
+}
+
+/* vfwnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */
+void VECTOR_HELPER(vfwnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_muladd(
+ env->fpr[rs1],
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[dest].f16[j], true,
+ &env->fp_status),
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_muladd(
+ env->fpr[rs1],
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[dest].f32[j],
+ &env->fp_status),
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f32[k] = 0;
+ case 32:
+ env->vfp.vreg[dest].f64[k] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+
+/* vfsqrt.v vd, vs2, vm # Vector-vector square root */
+void VECTOR_HELPER(vfsqrt_v)(CPURISCVState *env, uint32_t vm, uint32_t rs2,
+ uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_sqrt(
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_sqrt(
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_sqrt(
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ switch (width) {
+ case 16:
+ env->vfp.vreg[dest].f16[j] = 0;
+ case 32:
+ env->vfp.vreg[dest].f32[j] = 0;
+ case 64:
+ env->vfp.vreg[dest].f64[j] = 0;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfclass.v vd, vs2, vm # Vector-vector */
+void VECTOR_HELPER(vfclass_v)(CPURISCVState *env, uint32_t vm, uint32_t rs2,
+ uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = helper_fclass_h(
+ env->vfp.vreg[src2].f16[j]);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = helper_fclass_s(
+ env->vfp.vreg[src2].f32[j]);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = helper_fclass_d(
+ env->vfp.vreg[src2].f64[j]);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
+void VECTOR_HELPER(vfcvt_xu_f_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[j] = float16_to_uint16(
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[j] = float32_to_uint32(
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[j] = float64_to_uint64(
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
+void VECTOR_HELPER(vfcvt_x_f_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[j] = float16_to_int16(
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[j] = float32_to_int32(
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[j] = float64_to_int64(
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
+void VECTOR_HELPER(vfcvt_f_xu_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = uint16_to_float16(
+ env->vfp.vreg[src2].u16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = uint32_to_float32(
+ env->vfp.vreg[src2].u32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = uint64_to_float64(
+ env->vfp.vreg[src2].u64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
+void VECTOR_HELPER(vfcvt_f_x_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = int16_to_float16(
+ env->vfp.vreg[src2].s16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = int32_to_float32(
+ env->vfp.vreg[src2].s32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = int64_to_float64(
+ env->vfp.vreg[src2].s64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
+void VECTOR_HELPER(vfwcvt_xu_f_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+
+ if (lmul > 4) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] = float16_to_uint32(
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u64[k] = float32_to_uint64(
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ }
+ } else {
+ vector_tail_fwiden(env, dest, j, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
+void VECTOR_HELPER(vfwcvt_x_f_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (lmul > 4) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] = float16_to_int32(
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s64[k] = float32_to_int64(
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
+void VECTOR_HELPER(vfwcvt_f_xu_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (lmul > 4) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = uint16_to_float32(
+ env->vfp.vreg[src2].u16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = uint32_to_float64(
+ env->vfp.vreg[src2].u32[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
+void VECTOR_HELPER(vfwcvt_f_x_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (lmul > 4) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = int16_to_float32(
+ env->vfp.vreg[src2].s16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = int32_to_float64(
+ env->vfp.vreg[src2].s32[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/*
+ * vfwcvt.f.f.v vd, vs2, vm #
+ * Convert single-width float to double-width float.
+ */
+void VECTOR_HELPER(vfwcvt_f_f_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (lmul > 4) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float16_to_float32(
+ env->vfp.vreg[src2].f16[j],
+ true,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float32_to_float64(
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
+void VECTOR_HELPER(vfncvt_xu_f_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env) ||
+ vector_overlap_vm_common(lmul, vm, rd) ||
+ vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (lmul > 4) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / width);
+ j = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u16[k] = float32_to_uint16(
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].u32[k] = float64_to_uint32(
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fnarrow(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
+void VECTOR_HELPER(vfncvt_x_f_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env) ||
+ vector_overlap_vm_common(lmul, vm, rd) ||
+ vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (lmul > 4) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / width);
+ j = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s16[k] = float32_to_int16(
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].s32[k] = float64_to_int32(
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fnarrow(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
+void VECTOR_HELPER(vfncvt_f_xu_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) ||
+ vector_overlap_vm_common(lmul, vm, rd) ||
+ vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (lmul > 4) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / width);
+ j = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[k] = uint32_to_float16(
+ env->vfp.vreg[src2].u32[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = uint64_to_float32(
+ env->vfp.vreg[src2].u64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fnarrow(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
+void VECTOR_HELPER(vfncvt_f_x_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env) ||
+ vector_overlap_vm_common(lmul, vm, rd) ||
+ vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (lmul > 4) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / width);
+ j = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[k] = int32_to_float16(
+ env->vfp.vreg[src2].s32[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = int64_to_float32(
+ env->vfp.vreg[src2].s64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fnarrow(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
+void VECTOR_HELPER(vfncvt_f_f_v)(CPURISCVState *env, uint32_t vm,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+ if (vector_vtype_ill(env) ||
+ vector_overlap_vm_common(lmul, vm, rd) ||
+ vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (lmul > 4) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / width);
+ j = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[k] = float32_to_float16(
+ env->vfp.vreg[src2].f32[j],
+ true,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float64_to_float32(
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fnarrow(env, dest, k, width);
+ }
+ }
+ return;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].u8[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].u16[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].s8[j] =
+ cpu_ldsb_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].s16[j] = sign_extend(
+ cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend(
+ cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+ cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlsbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k;
+ env->vfp.vreg[dest + k * lmul].u8[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k;
+ env->vfp.vreg[dest + k * lmul].u16[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k;
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k;
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlsb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k;
+ env->vfp.vreg[dest + k * lmul].s8[j] =
+ cpu_ldsb_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k;
+ env->vfp.vreg[dest + k * lmul].s16[j] = sign_extend(
+ cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k;
+ env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend(
+ cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k;
+ env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+ cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlxbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, src2;
+ target_ulong addr;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ env->vfp.vreg[dest + k * lmul].u8[j] =
+ cpu_ldub_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ env->vfp.vreg[dest + k * lmul].u16[j] =
+ cpu_ldub_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_ldub_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_ldub_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, src2;
+ target_ulong addr;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ env->vfp.vreg[dest + k * lmul].s8[j] =
+ cpu_ldsb_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ env->vfp.vreg[dest + k * lmul].s16[j] = sign_extend(
+ cpu_ldsb_data(env, addr), 8);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend(
+ cpu_ldsb_data(env, addr), 8);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+ cpu_ldsb_data(env, addr), 8);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlbuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ env->foflag = true;
+ env->vfp.vl = 0;
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].u8[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].u16[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->foflag = false;
+ env->vfp.vl = vl;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlbff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+ env->foflag = true;
+ env->vfp.vl = 0;
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].s8[j] =
+ cpu_ldsb_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].s16[j] = sign_extend(
+ cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend(
+ cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+ cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->foflag = false;
+ env->vfp.vl = vl;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlhu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].u16[j] =
+ cpu_lduw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_lduw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_lduw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].s16[j] =
+ cpu_ldsw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend(
+ cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+ cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlshu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 2;
+ env->vfp.vreg[dest + k * lmul].u16[j] =
+ cpu_lduw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 2;
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_lduw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 2;
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_lduw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlsh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 2;
+ env->vfp.vreg[dest + k * lmul].s16[j] =
+ cpu_ldsw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 2;
+ env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend(
+ cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 2;
+ env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+ cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlxhu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, src2;
+ target_ulong addr;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 2, width, k);
+ env->vfp.vreg[dest + k * lmul].u16[j] =
+ cpu_lduw_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 2, width, k);
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_lduw_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 2, width, k);
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_lduw_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, src2;
+ target_ulong addr;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 2, width, k);
+ env->vfp.vreg[dest + k * lmul].s16[j] =
+ cpu_ldsw_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 2, width, k);
+ env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend(
+ cpu_ldsw_data(env, addr), 16);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 2, width, k);
+ env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+ cpu_ldsw_data(env, addr), 16);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlhuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rd, false);
+ env->foflag = true;
+ env->vfp.vl = 0;
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].u16[j] =
+ cpu_lduw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_lduw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_lduw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->foflag = false;
+ env->vfp.vl = vl;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlhff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rd, false);
+ env->foflag = true;
+ env->vfp.vl = 0;
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].s16[j] =
+ cpu_ldsw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend(
+ cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+ cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->foflag = false;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 4;
+ env->vfp.vreg[dest + k * lmul].s32[j] =
+ cpu_ldl_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 4;
+ env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+ cpu_ldl_data(env, env->gpr[rs1] + read), 32);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlwu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 4;
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_ldl_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 4;
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_ldl_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlswu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 4;
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_ldl_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 4;
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_ldl_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlsw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 4;
+ env->vfp.vreg[dest + k * lmul].s32[j] =
+ cpu_ldl_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 4;
+ env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+ cpu_ldl_data(env, env->gpr[rs1] + read), 32);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlxwu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, src2;
+ target_ulong addr;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 4, width, k);
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_ldl_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 4, width, k);
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_ldl_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, src2;
+ target_ulong addr;
+
+ vl = env->vfp.vl;
+
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 4, width, k);
+ env->vfp.vreg[dest + k * lmul].s32[j] =
+ cpu_ldl_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 4, width, k);
+ env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+ cpu_ldl_data(env, addr), 32);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlwuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rd, false);
+ env->foflag = true;
+ env->vfp.vl = 0;
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 4;
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_ldl_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 4;
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_ldl_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->foflag = false;
+ env->vfp.vl = vl;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlwff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ vector_lmul_check_reg(env, lmul, rd, false);
+ env->foflag = true;
+ env->vfp.vl = 0;
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 4;
+ env->vfp.vreg[dest + k * lmul].s32[j] =
+ cpu_ldl_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 4;
+ env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+ cpu_ldl_data(env, env->gpr[rs1] + read), 32);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->foflag = false;
+ env->vfp.vl = vl;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vle_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].u8[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].u16[j] =
+ cpu_lduw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 4;
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_ldl_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 8;
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_ldq_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k;
+ env->vfp.vreg[dest + k * lmul].u8[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 2;
+ env->vfp.vreg[dest + k * lmul].u16[j] =
+ cpu_lduw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 4;
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_ldl_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * env->gpr[rs2] + k * 8;
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_ldq_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, src2;
+ target_ulong addr;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ env->vfp.vreg[dest + k * lmul].u8[j] =
+ cpu_ldub_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 2, width, k);
+ env->vfp.vreg[dest + k * lmul].u16[j] =
+ cpu_lduw_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 4, width, k);
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_ldl_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 8, width, k);
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_ldq_data(env, addr);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vleff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+ env->vfp.vl = 0;
+ env->foflag = true;
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = i * (nf + 1) + k;
+ env->vfp.vreg[dest + k * lmul].u8[j] =
+ cpu_ldub_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 2;
+ env->vfp.vreg[dest + k * lmul].u16[j] =
+ cpu_lduw_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 4;
+ env->vfp.vreg[dest + k * lmul].u32[j] =
+ cpu_ldl_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ read = (i * (nf + 1) + k) * 8;
+ env->vfp.vreg[dest + k * lmul].u64[j] =
+ cpu_ldq_data(env, env->gpr[rs1] + read);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ env->vfp.vl++;
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_segment(env, dest, j, width, k, lmul);
+ }
+ }
+ env->foflag = false;
+ env->vfp.vl = vl;
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, wrote;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * (nf + 1) + k;
+ cpu_stb_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s8[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * (nf + 1) + k;
+ cpu_stb_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s16[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * (nf + 1) + k;
+ cpu_stb_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s32[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * (nf + 1) + k;
+ cpu_stb_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s64[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vssb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, wrote;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k;
+ cpu_stb_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s8[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k;
+ cpu_stb_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s16[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k;
+ cpu_stb_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s32[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k;
+ cpu_stb_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s64[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, src2;
+ target_ulong addr;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ cpu_stb_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s8[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ cpu_stb_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s16[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ cpu_stb_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s32[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ cpu_stb_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s64[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsuxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ return VECTOR_HELPER(vsxb_v)(env, nf, vm, rs1, rs2, rd);
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, wrote;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = (i * (nf + 1) + k) * 2;
+ cpu_stw_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s16[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = (i * (nf + 1) + k) * 2;
+ cpu_stw_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s32[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = (i * (nf + 1) + k) * 2;
+ cpu_stw_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s64[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vssh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, wrote;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k * 2;
+ cpu_stw_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s16[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k * 2;
+ cpu_stw_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s32[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k * 2;
+ cpu_stw_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s64[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, src2;
+ target_ulong addr;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 2, width, k);
+ cpu_stw_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s16[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 2, width, k);
+ cpu_stw_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s32[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 2, width, k);
+ cpu_stw_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s64[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsuxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ return VECTOR_HELPER(vsxh_v)(env, nf, vm, rs1, rs2, rd);
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, wrote;
+
+ vl = env->vfp.vl;
+
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = (i * (nf + 1) + k) * 4;
+ cpu_stl_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s32[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = (i * (nf + 1) + k) * 4;
+ cpu_stl_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s64[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vssw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, wrote;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k * 4;
+ cpu_stl_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s32[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k * 4;
+ cpu_stl_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s64[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, src2;
+ target_ulong addr;
+
+ vl = env->vfp.vl;
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 4, width, k);
+ cpu_stl_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s32[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 4, width, k);
+ cpu_stl_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s64[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsuxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ return VECTOR_HELPER(vsxw_v)(env, nf, vm, rs1, rs2, rd);
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, wrote;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * (nf + 1) + k;
+ cpu_stb_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s8[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = (i * (nf + 1) + k) * 2;
+ cpu_stw_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s16[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = (i * (nf + 1) + k) * 4;
+ cpu_stl_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s32[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = (i * (nf + 1) + k) * 8;
+ cpu_stq_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s64[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, wrote;
+
+ vl = env->vfp.vl;
+
+
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k;
+ cpu_stb_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s8[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k * 2;
+ cpu_stw_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s16[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k * 4;
+ cpu_stl_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s32[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ wrote = i * env->gpr[rs2] + k * 8;
+ cpu_stq_data(env, env->gpr[rs1] + wrote,
+ env->vfp.vreg[dest + k * lmul].s64[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ int i, j, k, vl, vlmax, lmul, width, dest, src2;
+ target_ulong addr;
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ if (lmul * (nf + 1) > 32) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = nf;
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 8:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 1, width, k);
+ cpu_stb_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s8[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 2, width, k);
+ cpu_stw_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s16[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 4, width, k);
+ cpu_stl_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s32[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ while (k >= 0) {
+ addr = vector_get_index(env, rs1, src2, j, 8, width, k);
+ cpu_stq_data(env, addr,
+ env->vfp.vreg[dest + k * lmul].s64[j]);
+ k--;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vsuxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+ uint32_t rs1, uint32_t rs2, uint32_t rd)
+{
+ return VECTOR_HELPER(vsxe_v)(env, nf, vm, rs1, rs2, rd);
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vamoswapw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TESL;
+#endif
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 32 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int32_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s32[j];
+ addr = idx + env->gpr[rs1];
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_xchgl_le(env, addr,
+ env->vfp.vreg[src3].s32[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_xchgl_le(env, addr,
+ env->vfp.vreg[src3].s32[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s32[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env, addr,
+ env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env, addr,
+ env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+
+void VECTOR_HELPER(vamoswapd_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TEQ;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 64 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_xchgq_le(env, addr,
+ env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_xchgq_le(env, addr,
+ env->vfp.vreg[src3].s64[j]);
+#endif
+
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vamoaddw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TESL;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 32 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int32_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s32[j];
+ addr = idx + env->gpr[rs1];
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_addl_le(env, addr,
+ env->vfp.vreg[src3].s32[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_addl_le(env, addr,
+ env->vfp.vreg[src3].s32[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s32[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_addl_le(env,
+ addr, env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_addl_le(env,
+ addr, env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vamoaddd_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TEQ;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 64 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_addq_le(env, addr,
+ env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_addq_le(env, addr,
+ env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vamoxorw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TESL;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 32 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int32_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s32[j];
+ addr = idx + env->gpr[rs1];
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_xorl_le(env, addr,
+ env->vfp.vreg[src3].s32[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_xorl_le(env, addr,
+ env->vfp.vreg[src3].s32[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s32[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env,
+ addr, env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env,
+ addr, env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+
+void VECTOR_HELPER(vamoxord_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TESL;
+#endif
+
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 64 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_xorq_le(env, addr,
+ env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_xorq_le(env, addr,
+ env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vamoandw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TESL;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 32 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int32_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s32[j];
+ addr = idx + env->gpr[rs1];
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_andl_le(env, addr,
+ env->vfp.vreg[src3].s32[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_andl_le(env, addr,
+ env->vfp.vreg[src3].s32[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s32[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_andl_le(env,
+ addr, env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_andl_le(env,
+ addr, env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vamoandd_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TEQ;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 64 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_andq_le(env, addr,
+ env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_andq_le(env, addr,
+ env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vamoorw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TESL;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 32 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int32_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s32[j];
+ addr = idx + env->gpr[rs1];
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_orl_le(env, addr,
+ env->vfp.vreg[src3].s32[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_orl_le(env, addr,
+ env->vfp.vreg[src3].s32[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s32[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_orl_le(env,
+ addr, env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_orl_le(env,
+ addr, env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vamoord_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TEQ;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 64 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_orq_le(env, addr,
+ env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_orq_le(env, addr,
+ env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vamominw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TESL;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 32 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int32_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s32[j];
+ addr = idx + env->gpr[rs1];
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_sminl_le(env, addr,
+ env->vfp.vreg[src3].s32[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_sminl_le(env, addr,
+ env->vfp.vreg[src3].s32[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s32[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env,
+ addr, env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env,
+ addr, env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+
+void VECTOR_HELPER(vamomind_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TEQ;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 64 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_sminq_le(env, addr,
+ env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_sminq_le(env, addr,
+ env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vamomaxw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TESL;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 32 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int32_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s32[j];
+ addr = idx + env->gpr[rs1];
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_smaxl_le(env, addr,
+ env->vfp.vreg[src3].s32[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_smaxl_le(env, addr,
+ env->vfp.vreg[src3].s32[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s32[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env,
+ addr, env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env,
+ addr, env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+
+void VECTOR_HELPER(vamomaxd_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TEQ;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 64 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ int64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_smaxq_le(env, addr,
+ env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_smaxq_le(env, addr,
+ env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vamominuw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TESL;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 32 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ uint32_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s32[j];
+ addr = idx + env->gpr[rs1];
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_uminl_le(env, addr,
+ env->vfp.vreg[src3].s32[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_uminl_le(env, addr,
+ env->vfp.vreg[src3].s32[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s32[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ uint64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le(
+ env, addr, env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le(
+ env, addr, env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+
+void VECTOR_HELPER(vamominud_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TESL;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 64 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ uint32_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s32[j];
+ addr = idx + env->gpr[rs1];
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_uminl_le(env, addr,
+ env->vfp.vreg[src3].s32[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_uminl_le(env, addr,
+ env->vfp.vreg[src3].s32[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s32[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ uint64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_uminq_le(
+ env, addr, env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_uminq_le(env, addr,
+ env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vamomaxuw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TESL;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 32 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ uint32_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s32[j];
+ addr = idx + env->gpr[rs1];
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_umaxl_le(env, addr,
+ env->vfp.vreg[src3].s32[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_umaxl_le(env, addr,
+ env->vfp.vreg[src3].s32[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s32[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ uint64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le(
+ env, addr, env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le(
+ env, addr, env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+void VECTOR_HELPER(vamomaxud_v)(CPURISCVState *env, uint32_t wd, uint32_t vm,
+ uint32_t rs1, uint32_t vs2, uint32_t vs3)
+{
+ int i, j, vl;
+ target_long idx;
+ uint32_t lmul, width, src2, src3, vlmax;
+ target_ulong addr;
+#ifdef CONFIG_SOFTMMU
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOp memop = MO_ALIGN | MO_TEQ;
+#endif
+
+ vl = env->vfp.vl;
+ lmul = vector_get_lmul(env);
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+ /* MEM <= SEW <= XLEN */
+ if (width < 64 || (width > sizeof(target_ulong) * 8)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ /* if wd, rd is writen the old value */
+ if (vector_vtype_ill(env) ||
+ (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, vs2, false);
+ vector_lmul_check_reg(env, lmul, vs3, false);
+
+ for (i = 0; i < vlmax; i++) {
+ src2 = vs2 + (i / (VLEN / width));
+ src3 = vs3 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ uint64_t tmp;
+ idx = (target_long)env->vfp.vreg[src2].s64[j];
+ addr = idx + env->gpr[rs1];
+
+#ifdef CONFIG_SOFTMMU
+ tmp = helper_atomic_fetch_umaxq_le(
+ env, addr, env->vfp.vreg[src3].s64[j],
+ make_memop_idx(memop & ~MO_SIGN, mem_idx));
+#else
+ tmp = helper_atomic_fetch_umaxq_le(env, addr,
+ env->vfp.vreg[src3].s64[j]);
+#endif
+ if (wd) {
+ env->vfp.vreg[src3].s64[j] = tmp;
+ }
+ env->vfp.vstart++;
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ break;
+ }
+ } else {
+ vector_tail_amo(env, src3, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+}
+
--
2.7.4
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 2:36 [Qemu-riscv] [PATCH] RISCV: support riscv vector extension 0.7.1 liuzhiwei
@ 2019-08-28 9:08 ` Alex Bennée
2019-08-28 18:54 ` [Qemu-riscv] " Richard Henderson
` (3 subsequent siblings)
4 siblings, 0 replies; 52+ messages in thread
From: Alex Bennée @ 2019-08-28 9:08 UTC (permalink / raw)
To: liuzhiwei
Cc: peter.maydell, palmer, qemu-riscv, sagark, kbastian, riku.voipio,
qemu-devel, laurent, Alistair.Francis, aurelien
liuzhiwei <zhiwei_liu@c-sky.com> writes:
> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
> ---
> fpu/softfloat.c | 119 +
> include/fpu/softfloat.h | 4 +
Changes to softfloat should be in a separate patch, but see bellow.
> linux-user/riscv/cpu_loop.c | 8 +-
> target/riscv/Makefile.objs | 2 +-
> target/riscv/cpu.h | 30 +
> target/riscv/cpu_bits.h | 15 +
> target/riscv/cpu_helper.c | 7 +
> target/riscv/csr.c | 65 +-
> target/riscv/helper.h | 354 +
> target/riscv/insn32.decode | 374 +-
> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> target/riscv/translate.c | 1 +
> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
This is likely too big to be reviewed. Is it possible to split the patch
up into more discrete chunks, for example support pieces and then maybe
a class at a time?
> 13 files changed, 28017 insertions(+), 9 deletions(-)
> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> create mode 100644 target/riscv/vector_helper.c
>
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index 2ba36ec..da155ea 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a)
> }
>
> /*----------------------------------------------------------------------------
> +| Returns the sign bit of the half-precision floating-point value `a'.
> +*----------------------------------------------------------------------------*/
> +
> +static inline flag extractFloat16Sign(float16 a)
> +{
> + return float16_val(a) >> 0xf;
> +}
> +
We are trying to avoid this sort of bit fiddling for new code when we
already have generic decompose functions that can extract all the parts
into a common format.
> +
> +/*----------------------------------------------------------------------------
> | Returns the fraction bits of the single-precision floating-point value `a'.
> *----------------------------------------------------------------------------*/
>
> @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status *status)
> }
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point value `a' is less than
> +| or equal to the corresponding value `b', and 0 otherwise. The invalid
> +| exception is raised if either operand is a NaN. The comparison is performed
> +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
> +*----------------------------------------------------------------------------*/
> +
> +int float16_le(float16 a, float16 b, float_status *status)
> +{
> + flag aSign, bSign;
> + uint16_t av, bv;
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
> + ) {
> + float_raise(float_flag_invalid, status);
> + return 0;
> + }
> + aSign = extractFloat16Sign( a );
> + bSign = extractFloat16Sign( b );
> + av = float16_val(a);
> + bv = float16_val(b);
> + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 ) == 0 );
> + return ( av == bv ) || ( aSign ^ ( av < bv ) );
> +
> +}
What does this provide that:
float16_compare(a, b, status) == float_relation_less;
doesn't?
> +
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point value `a' is less than
> | or equal to the corresponding value `b', and 0 otherwise. The invalid
> | exception is raised if either operand is a NaN. The comparison is performed
> @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status *status)
> | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
> *----------------------------------------------------------------------------*/
>
> +int float16_lt(float16 a, float16 b, float_status *status)
> +{
> + flag aSign, bSign;
> + uint16_t av, bv;
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
> + ) {
> + float_raise(float_flag_invalid, status);
> + return 0;
> + }
> + aSign = extractFloat16Sign( a );
> + bSign = extractFloat16Sign( b );
> + av = float16_val(a);
> + bv = float16_val(b);
> + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 ) != 0 );
> + return ( av != bv ) && ( aSign ^ ( av < bv ) );
> +
> +}
> +
> +/*----------------------------------------------------------------------------
> +| Returns 1 if the single-precision floating-point value `a' is less than
> +| the corresponding value `b', and 0 otherwise. The invalid exception is
> +| raised if either operand is a NaN. The comparison is performed according
> +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
> +*----------------------------------------------------------------------------*/
> +
> int float32_lt(float32 a, float32 b, float_status *status)
> {
> flag aSign, bSign;
> @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b, float_status *status)
> }
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point value `a' is equal to
> +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
> +| exception. The comparison is performed according to the IEC/IEEE Standard
> +| for Binary Floating-Point Arithmetic.
> +*----------------------------------------------------------------------------*/
> +
> +int float16_eq_quiet(float16 a, float16 b, float_status *status)
> +{
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
> + ) {
> + if (float16_is_signaling_nan(a, status)
> + || float16_is_signaling_nan(b, status)) {
> + float_raise(float_flag_invalid, status);
> + }
> + return 0;
> + }
> + return ( float16_val(a) == float16_val(b) ) ||
> + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0 );
> +}
> +
See also float_16_compare_quiet
> +
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point value `a' is equal to
> | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
> | exception. The comparison is performed according to the IEC/IEEE Standard
> @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b, float_status *status)
> }
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point values `a' and `b' cannot
> +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
> +| comparison is performed according to the IEC/IEEE Standard for Binary
> +| Floating-Point Arithmetic.
> +*----------------------------------------------------------------------------*/
> +
> +int float16_unordered_quiet(float16 a, float16 b, float_status *status)
> +{
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
> + ) {
> + if (float16_is_signaling_nan(a, status)
> + || float16_is_signaling_nan(b, status)) {
> + float_raise(float_flag_invalid, status);
> + }
> + return 1;
> + }
> + return 0;
> +}
> +
> +
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point values `a' and `b' cannot
> | be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
> | comparison is performed according to the IEC/IEEE Standard for Binary
> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
> index 3ff3fa5..3b0754c 100644
> --- a/include/fpu/softfloat.h
> +++ b/include/fpu/softfloat.h
> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status);
> float16 float16_sqrt(float16, float_status *status);
> int float16_compare(float16, float16, float_status *status);
> int float16_compare_quiet(float16, float16, float_status *status);
> +int float16_unordered_quiet(float16, float16, float_status *status);
> +int float16_le(float16, float16, float_status *status);
> +int float16_lt(float16, float16, float_status *status);
> +int float16_eq_quiet(float16, float16, float_status *status);
>
> int float16_is_quiet_nan(float16, float_status *status);
> int float16_is_signaling_nan(float16, float_status *status);
> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
> index 12aa3c0..b01548a 100644
> --- a/linux-user/riscv/cpu_loop.c
> +++ b/linux-user/riscv/cpu_loop.c
> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
> signum = 0;
> sigcode = 0;
> sigaddr = 0;
> -
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + continue;
> + }
> + }
What is this trying to do?
> switch (trapnr) {
> case EXCP_INTERRUPT:
> /* just indicate that signals should be handled asap */
> diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
> index b1c79bc..d577cef 100644
> --- a/target/riscv/Makefile.objs
> +++ b/target/riscv/Makefile.objs
> @@ -1,4 +1,4 @@
> -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o pmp.o
> +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o vector_helper.o gdbstub.o pmp.o
>
> DECODETREE = $(SRC_PATH)/scripts/decodetree.py
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 0adb307..5a93aa2 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -67,6 +67,7 @@
> #define RVC RV('C')
> #define RVS RV('S')
> #define RVU RV('U')
> +#define RVV RV('V')
>
> /* S extension denotes that Supervisor mode exists, however it is possible
> to have a core that support S mode but does not have an MMU and there
> @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState;
>
> #include "pmp.h"
>
> +#define VLEN 128
> +#define VUNIT(x) (VLEN / x)
> +
If you want to do vectors I suggest you look at the TCGvec types for
passing pointers to vector registers to helpers. In this case you will
want to ensure your vector registers are properly aligned.
> struct CPURISCVState {
> target_ulong gpr[32];
> uint64_t fpr[32]; /* assume both F and D extensions */
> +
> + /* vector coprocessor state. */
> + struct {
> + union VECTOR {
> + float64 f64[VUNIT(64)];
> + float32 f32[VUNIT(32)];
> + float16 f16[VUNIT(16)];
> + target_ulong ul[VUNIT(sizeof(target_ulong))];
> + uint64_t u64[VUNIT(64)];
> + int64_t s64[VUNIT(64)];
> + uint32_t u32[VUNIT(32)];
> + int32_t s32[VUNIT(32)];
> + uint16_t u16[VUNIT(16)];
> + int16_t s16[VUNIT(16)];
> + uint8_t u8[VUNIT(8)];
> + int8_t s8[VUNIT(8)];
> + } vreg[32];
> + target_ulong vxrm;
> + target_ulong vxsat;
> + target_ulong vl;
> + target_ulong vstart;
> + target_ulong vtype;
> + float_status fp_status;
> + } vfp;
> +
> + bool foflag;
Again I have no idea what foflag is here.
> target_ulong pc;
> target_ulong load_res;
> target_ulong load_val;
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index 11f971a..9eb43ec 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -29,6 +29,14 @@
> #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT)
> #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
>
> +/* Vector Fixed-Point round model */
> +#define FSR_VXRM_SHIFT 9
> +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT)
> +
> +/* Vector Fixed-Point saturation flag */
> +#define FSR_VXSAT_SHIFT 8
> +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT)
> +
> /* Control and Status Registers */
>
> /* User Trap Setup */
> @@ -48,6 +56,13 @@
> #define CSR_FRM 0x002
> #define CSR_FCSR 0x003
>
> +/* User Vector CSRs */
> +#define CSR_VSTART 0x008
> +#define CSR_VXSAT 0x009
> +#define CSR_VXRM 0x00a
> +#define CSR_VL 0xc20
> +#define CSR_VTYPE 0xc21
> +
> /* User Timers and Counters */
> #define CSR_CYCLE 0xc00
> #define CSR_TIME 0xc01
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index e32b612..405caf6 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
> [PRV_H] = RISCV_EXCP_H_ECALL,
> [PRV_M] = RISCV_EXCP_M_ECALL
> };
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + return;
> + }
> + }
>
> if (!async) {
> /* set tval to badaddr for traps with address information */
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index e0d4586..a6131ff 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno)
> return 0;
> }
>
> -#if !defined(CONFIG_USER_ONLY)
> static int any(CPURISCVState *env, int csrno)
> {
> return 0;
> }
>
> +#if !defined(CONFIG_USER_ONLY)
> static int smode(CPURISCVState *env, int csrno)
> {
> return -!riscv_has_ext(env, RVS);
> @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val)
> return -1;
> }
> #endif
> - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
> - | (env->frm << FSR_RD_SHIFT);
> + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT)
> + | (env->vfp.vxsat << FSR_VXSAT_SHIFT)
> + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
> + | (env->frm << FSR_RD_SHIFT);
> return 0;
> }
>
> @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val)
> env->mstatus |= MSTATUS_FS;
> #endif
> env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
> + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
> + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
> riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
> return 0;
> }
>
> +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vtype;
> + return 0;
> +}
> +
> +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vl;
> + return 0;
> +}
> +
> +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vxrm;
> + return 0;
> +}
> +
> +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vxsat;
> + return 0;
> +}
> +
> +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vstart;
> + return 0;
> +}
> +
> +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val)
> +{
> + env->vfp.vxrm = val;
> + return 0;
> +}
> +
> +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val)
> +{
> + env->vfp.vxsat = val;
> + return 0;
> +}
> +
> +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val)
> +{
> + env->vfp.vstart = val;
> + return 0;
> +}
A fixed return value makes me think these should be void functions.
> +
> /* User Timers and Counters */
> static int read_instret(CPURISCVState *env, int csrno, target_ulong *val)
> {
> @@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
> [CSR_FFLAGS] = { fs, read_fflags, write_fflags },
> [CSR_FRM] = { fs, read_frm, write_frm },
> [CSR_FCSR] = { fs, read_fcsr, write_fcsr },
> -
> + /* Vector CSRs */
> + [CSR_VSTART] = { any, read_vstart, write_vstart },
> + [CSR_VXSAT] = { any, read_vxsat, write_vxsat },
> + [CSR_VXRM] = { any, read_vxrm, write_vxrm },
> + [CSR_VL] = { any, read_vl },
> + [CSR_VTYPE] = { any, read_vtype },
> /* User Timers and Counters */
> [CSR_CYCLE] = { ctr, read_instret },
> [CSR_INSTRET] = { ctr, read_instret },
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index debb22a..fee02c0 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl)
> DEF_HELPER_1(wfi, void, env)
> DEF_HELPER_1(tlb_flush, void, env)
> #endif
> +/* Vector functions */
Think about how you could split this patch up to introduce a group of
instructions at a time. This will make it a lot easier review.
I'm going to leave review of the specifics to the RISCV maintainers but
I suspect they will want to wait until a v2 of the series. However it
looks like a good first pass at implementing vectors.
--
Alex Bennée
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-28 9:08 ` Alex Bennée
0 siblings, 0 replies; 52+ messages in thread
From: Alex Bennée @ 2019-08-28 9:08 UTC (permalink / raw)
To: liuzhiwei
Cc: qemu-devel, qemu-riscv, aurelien, peter.maydell, riku.voipio,
laurent, palmer, Alistair.Francis, sagark, kbastian
liuzhiwei <zhiwei_liu@c-sky.com> writes:
> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
> ---
> fpu/softfloat.c | 119 +
> include/fpu/softfloat.h | 4 +
Changes to softfloat should be in a separate patch, but see bellow.
> linux-user/riscv/cpu_loop.c | 8 +-
> target/riscv/Makefile.objs | 2 +-
> target/riscv/cpu.h | 30 +
> target/riscv/cpu_bits.h | 15 +
> target/riscv/cpu_helper.c | 7 +
> target/riscv/csr.c | 65 +-
> target/riscv/helper.h | 354 +
> target/riscv/insn32.decode | 374 +-
> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> target/riscv/translate.c | 1 +
> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
This is likely too big to be reviewed. Is it possible to split the patch
up into more discrete chunks, for example support pieces and then maybe
a class at a time?
> 13 files changed, 28017 insertions(+), 9 deletions(-)
> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> create mode 100644 target/riscv/vector_helper.c
>
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index 2ba36ec..da155ea 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a)
> }
>
> /*----------------------------------------------------------------------------
> +| Returns the sign bit of the half-precision floating-point value `a'.
> +*----------------------------------------------------------------------------*/
> +
> +static inline flag extractFloat16Sign(float16 a)
> +{
> + return float16_val(a) >> 0xf;
> +}
> +
We are trying to avoid this sort of bit fiddling for new code when we
already have generic decompose functions that can extract all the parts
into a common format.
> +
> +/*----------------------------------------------------------------------------
> | Returns the fraction bits of the single-precision floating-point value `a'.
> *----------------------------------------------------------------------------*/
>
> @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status *status)
> }
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point value `a' is less than
> +| or equal to the corresponding value `b', and 0 otherwise. The invalid
> +| exception is raised if either operand is a NaN. The comparison is performed
> +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
> +*----------------------------------------------------------------------------*/
> +
> +int float16_le(float16 a, float16 b, float_status *status)
> +{
> + flag aSign, bSign;
> + uint16_t av, bv;
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
> + ) {
> + float_raise(float_flag_invalid, status);
> + return 0;
> + }
> + aSign = extractFloat16Sign( a );
> + bSign = extractFloat16Sign( b );
> + av = float16_val(a);
> + bv = float16_val(b);
> + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 ) == 0 );
> + return ( av == bv ) || ( aSign ^ ( av < bv ) );
> +
> +}
What does this provide that:
float16_compare(a, b, status) == float_relation_less;
doesn't?
> +
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point value `a' is less than
> | or equal to the corresponding value `b', and 0 otherwise. The invalid
> | exception is raised if either operand is a NaN. The comparison is performed
> @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status *status)
> | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
> *----------------------------------------------------------------------------*/
>
> +int float16_lt(float16 a, float16 b, float_status *status)
> +{
> + flag aSign, bSign;
> + uint16_t av, bv;
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
> + ) {
> + float_raise(float_flag_invalid, status);
> + return 0;
> + }
> + aSign = extractFloat16Sign( a );
> + bSign = extractFloat16Sign( b );
> + av = float16_val(a);
> + bv = float16_val(b);
> + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 ) != 0 );
> + return ( av != bv ) && ( aSign ^ ( av < bv ) );
> +
> +}
> +
> +/*----------------------------------------------------------------------------
> +| Returns 1 if the single-precision floating-point value `a' is less than
> +| the corresponding value `b', and 0 otherwise. The invalid exception is
> +| raised if either operand is a NaN. The comparison is performed according
> +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
> +*----------------------------------------------------------------------------*/
> +
> int float32_lt(float32 a, float32 b, float_status *status)
> {
> flag aSign, bSign;
> @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b, float_status *status)
> }
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point value `a' is equal to
> +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
> +| exception. The comparison is performed according to the IEC/IEEE Standard
> +| for Binary Floating-Point Arithmetic.
> +*----------------------------------------------------------------------------*/
> +
> +int float16_eq_quiet(float16 a, float16 b, float_status *status)
> +{
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
> + ) {
> + if (float16_is_signaling_nan(a, status)
> + || float16_is_signaling_nan(b, status)) {
> + float_raise(float_flag_invalid, status);
> + }
> + return 0;
> + }
> + return ( float16_val(a) == float16_val(b) ) ||
> + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0 );
> +}
> +
See also float_16_compare_quiet
> +
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point value `a' is equal to
> | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
> | exception. The comparison is performed according to the IEC/IEEE Standard
> @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b, float_status *status)
> }
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point values `a' and `b' cannot
> +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
> +| comparison is performed according to the IEC/IEEE Standard for Binary
> +| Floating-Point Arithmetic.
> +*----------------------------------------------------------------------------*/
> +
> +int float16_unordered_quiet(float16 a, float16 b, float_status *status)
> +{
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
> + ) {
> + if (float16_is_signaling_nan(a, status)
> + || float16_is_signaling_nan(b, status)) {
> + float_raise(float_flag_invalid, status);
> + }
> + return 1;
> + }
> + return 0;
> +}
> +
> +
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point values `a' and `b' cannot
> | be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
> | comparison is performed according to the IEC/IEEE Standard for Binary
> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
> index 3ff3fa5..3b0754c 100644
> --- a/include/fpu/softfloat.h
> +++ b/include/fpu/softfloat.h
> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status);
> float16 float16_sqrt(float16, float_status *status);
> int float16_compare(float16, float16, float_status *status);
> int float16_compare_quiet(float16, float16, float_status *status);
> +int float16_unordered_quiet(float16, float16, float_status *status);
> +int float16_le(float16, float16, float_status *status);
> +int float16_lt(float16, float16, float_status *status);
> +int float16_eq_quiet(float16, float16, float_status *status);
>
> int float16_is_quiet_nan(float16, float_status *status);
> int float16_is_signaling_nan(float16, float_status *status);
> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
> index 12aa3c0..b01548a 100644
> --- a/linux-user/riscv/cpu_loop.c
> +++ b/linux-user/riscv/cpu_loop.c
> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
> signum = 0;
> sigcode = 0;
> sigaddr = 0;
> -
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + continue;
> + }
> + }
What is this trying to do?
> switch (trapnr) {
> case EXCP_INTERRUPT:
> /* just indicate that signals should be handled asap */
> diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
> index b1c79bc..d577cef 100644
> --- a/target/riscv/Makefile.objs
> +++ b/target/riscv/Makefile.objs
> @@ -1,4 +1,4 @@
> -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o pmp.o
> +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o vector_helper.o gdbstub.o pmp.o
>
> DECODETREE = $(SRC_PATH)/scripts/decodetree.py
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 0adb307..5a93aa2 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -67,6 +67,7 @@
> #define RVC RV('C')
> #define RVS RV('S')
> #define RVU RV('U')
> +#define RVV RV('V')
>
> /* S extension denotes that Supervisor mode exists, however it is possible
> to have a core that support S mode but does not have an MMU and there
> @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState;
>
> #include "pmp.h"
>
> +#define VLEN 128
> +#define VUNIT(x) (VLEN / x)
> +
If you want to do vectors I suggest you look at the TCGvec types for
passing pointers to vector registers to helpers. In this case you will
want to ensure your vector registers are properly aligned.
> struct CPURISCVState {
> target_ulong gpr[32];
> uint64_t fpr[32]; /* assume both F and D extensions */
> +
> + /* vector coprocessor state. */
> + struct {
> + union VECTOR {
> + float64 f64[VUNIT(64)];
> + float32 f32[VUNIT(32)];
> + float16 f16[VUNIT(16)];
> + target_ulong ul[VUNIT(sizeof(target_ulong))];
> + uint64_t u64[VUNIT(64)];
> + int64_t s64[VUNIT(64)];
> + uint32_t u32[VUNIT(32)];
> + int32_t s32[VUNIT(32)];
> + uint16_t u16[VUNIT(16)];
> + int16_t s16[VUNIT(16)];
> + uint8_t u8[VUNIT(8)];
> + int8_t s8[VUNIT(8)];
> + } vreg[32];
> + target_ulong vxrm;
> + target_ulong vxsat;
> + target_ulong vl;
> + target_ulong vstart;
> + target_ulong vtype;
> + float_status fp_status;
> + } vfp;
> +
> + bool foflag;
Again I have no idea what foflag is here.
> target_ulong pc;
> target_ulong load_res;
> target_ulong load_val;
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index 11f971a..9eb43ec 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -29,6 +29,14 @@
> #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT)
> #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
>
> +/* Vector Fixed-Point round model */
> +#define FSR_VXRM_SHIFT 9
> +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT)
> +
> +/* Vector Fixed-Point saturation flag */
> +#define FSR_VXSAT_SHIFT 8
> +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT)
> +
> /* Control and Status Registers */
>
> /* User Trap Setup */
> @@ -48,6 +56,13 @@
> #define CSR_FRM 0x002
> #define CSR_FCSR 0x003
>
> +/* User Vector CSRs */
> +#define CSR_VSTART 0x008
> +#define CSR_VXSAT 0x009
> +#define CSR_VXRM 0x00a
> +#define CSR_VL 0xc20
> +#define CSR_VTYPE 0xc21
> +
> /* User Timers and Counters */
> #define CSR_CYCLE 0xc00
> #define CSR_TIME 0xc01
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index e32b612..405caf6 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
> [PRV_H] = RISCV_EXCP_H_ECALL,
> [PRV_M] = RISCV_EXCP_M_ECALL
> };
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + return;
> + }
> + }
>
> if (!async) {
> /* set tval to badaddr for traps with address information */
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index e0d4586..a6131ff 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno)
> return 0;
> }
>
> -#if !defined(CONFIG_USER_ONLY)
> static int any(CPURISCVState *env, int csrno)
> {
> return 0;
> }
>
> +#if !defined(CONFIG_USER_ONLY)
> static int smode(CPURISCVState *env, int csrno)
> {
> return -!riscv_has_ext(env, RVS);
> @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val)
> return -1;
> }
> #endif
> - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
> - | (env->frm << FSR_RD_SHIFT);
> + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT)
> + | (env->vfp.vxsat << FSR_VXSAT_SHIFT)
> + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
> + | (env->frm << FSR_RD_SHIFT);
> return 0;
> }
>
> @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val)
> env->mstatus |= MSTATUS_FS;
> #endif
> env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
> + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
> + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
> riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
> return 0;
> }
>
> +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vtype;
> + return 0;
> +}
> +
> +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vl;
> + return 0;
> +}
> +
> +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vxrm;
> + return 0;
> +}
> +
> +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vxsat;
> + return 0;
> +}
> +
> +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vstart;
> + return 0;
> +}
> +
> +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val)
> +{
> + env->vfp.vxrm = val;
> + return 0;
> +}
> +
> +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val)
> +{
> + env->vfp.vxsat = val;
> + return 0;
> +}
> +
> +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val)
> +{
> + env->vfp.vstart = val;
> + return 0;
> +}
A fixed return value makes me think these should be void functions.
> +
> /* User Timers and Counters */
> static int read_instret(CPURISCVState *env, int csrno, target_ulong *val)
> {
> @@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
> [CSR_FFLAGS] = { fs, read_fflags, write_fflags },
> [CSR_FRM] = { fs, read_frm, write_frm },
> [CSR_FCSR] = { fs, read_fcsr, write_fcsr },
> -
> + /* Vector CSRs */
> + [CSR_VSTART] = { any, read_vstart, write_vstart },
> + [CSR_VXSAT] = { any, read_vxsat, write_vxsat },
> + [CSR_VXRM] = { any, read_vxrm, write_vxrm },
> + [CSR_VL] = { any, read_vl },
> + [CSR_VTYPE] = { any, read_vtype },
> /* User Timers and Counters */
> [CSR_CYCLE] = { ctr, read_instret },
> [CSR_INSTRET] = { ctr, read_instret },
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index debb22a..fee02c0 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl)
> DEF_HELPER_1(wfi, void, env)
> DEF_HELPER_1(tlb_flush, void, env)
> #endif
> +/* Vector functions */
Think about how you could split this patch up to introduce a group of
instructions at a time. This will make it a lot easier review.
I'm going to leave review of the specifics to the RISCV maintainers but
I suspect they will want to wait until a v2 of the series. However it
looks like a good first pass at implementing vectors.
--
Alex Bennée
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 9:08 ` [Qemu-riscv] " Alex Bennée
@ 2019-08-28 16:39 ` Richard Henderson
-1 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-08-28 16:39 UTC (permalink / raw)
To: Alex Bennée, liuzhiwei
Cc: peter.maydell, palmer, qemu-riscv, sagark, kbastian, riku.voipio,
qemu-devel, laurent, Alistair.Francis, aurelien
On 8/28/19 2:08 AM, Alex Bennée wrote:
> If you want to do vectors I suggest you look at the TCGvec types for
> passing pointers to vector registers to helpers. In this case you will
> want to ensure your vector registers are properly aligned.
The risc-v vector extension is very different from any other existing vector
extension. In particular, the locations of the vector elements vary
dynamically. Except for certain special cases I doubt that risc-v can make
direct use of the generic TCG vector support.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-28 16:39 ` Richard Henderson
0 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-08-28 16:39 UTC (permalink / raw)
To: Alex Bennée, liuzhiwei
Cc: peter.maydell, palmer, qemu-riscv, sagark, kbastian, riku.voipio,
qemu-devel, laurent, Alistair.Francis, aurelien
On 8/28/19 2:08 AM, Alex Bennée wrote:
> If you want to do vectors I suggest you look at the TCGvec types for
> passing pointers to vector registers to helpers. In this case you will
> want to ensure your vector registers are properly aligned.
The risc-v vector extension is very different from any other existing vector
extension. In particular, the locations of the vector elements vary
dynamically. Except for certain special cases I doubt that risc-v can make
direct use of the generic TCG vector support.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 2:36 [Qemu-riscv] [PATCH] RISCV: support riscv vector extension 0.7.1 liuzhiwei
@ 2019-08-28 18:54 ` Richard Henderson
2019-08-28 18:54 ` [Qemu-riscv] " Richard Henderson
` (3 subsequent siblings)
4 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-08-28 18:54 UTC (permalink / raw)
To: liuzhiwei, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 8/27/19 7:36 PM, liuzhiwei wrote:
> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
> ---
> fpu/softfloat.c | 119 +
> include/fpu/softfloat.h | 4 +
> linux-user/riscv/cpu_loop.c | 8 +-
> target/riscv/Makefile.objs | 2 +-
> target/riscv/cpu.h | 30 +
> target/riscv/cpu_bits.h | 15 +
> target/riscv/cpu_helper.c | 7 +
> target/riscv/csr.c | 65 +-
> target/riscv/helper.h | 354 +
> target/riscv/insn32.decode | 374 +-
> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> target/riscv/translate.c | 1 +
> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
> 13 files changed, 28017 insertions(+), 9 deletions(-)
As Alex mentioned, this is *far* too big to be presented as a single patch.
> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
> index 3ff3fa5..3b0754c 100644
> --- a/include/fpu/softfloat.h
> +++ b/include/fpu/softfloat.h
> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status);
> float16 float16_sqrt(float16, float_status *status);
> int float16_compare(float16, float16, float_status *status);
> int float16_compare_quiet(float16, float16, float_status *status);
> +int float16_unordered_quiet(float16, float16, float_status *status);
> +int float16_le(float16, float16, float_status *status);
> +int float16_lt(float16, float16, float_status *status);
> +int float16_eq_quiet(float16, float16, float_status *status);
As Alex mentioned, none of these changes are required, as all
functionality is provided by float16_compare{,_quiet}.
> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
> index 12aa3c0..b01548a 100644
> --- a/linux-user/riscv/cpu_loop.c
> +++ b/linux-user/riscv/cpu_loop.c
> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
> signum = 0;
> sigcode = 0;
> sigaddr = 0;
> -
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + continue;
> + }
This is most definitely not the correct way to implement first-fault.
You need to have a look at target/arm/sve_helper.c, e.g. sve_ldff1_r,
where we test pages for validity with tlb_vaddr_to_host.
> + /* vector coprocessor state. */
> + struct {
> + union VECTOR {
> + float64 f64[VUNIT(64)];
> + float32 f32[VUNIT(32)];
> + float16 f16[VUNIT(16)];
> + target_ulong ul[VUNIT(sizeof(target_ulong))];
> + uint64_t u64[VUNIT(64)];
> + int64_t s64[VUNIT(64)];
> + uint32_t u32[VUNIT(32)];
> + int32_t s32[VUNIT(32)];
> + uint16_t u16[VUNIT(16)];
> + int16_t s16[VUNIT(16)];
> + uint8_t u8[VUNIT(8)];
> + int8_t s8[VUNIT(8)];
> + } vreg[32];
> + target_ulong vxrm;
> + target_ulong vxsat;
> + target_ulong vl;
> + target_ulong vstart;
> + target_ulong vtype;
> + float_status fp_status;
> + } vfp;
You've obviously copied "vfp" from target/arm. Drop that. It makes no sense
in the context of risc-v.
I'm not sure that vreg[].element[] really makes the most sense in the context
of how risc-v rearranges its elements. It will almost certainly fail clang
validators, if enabled, since you'll be indexing beyond the end of vreg[n] into
vreg[n+1].
It might be best to have a single array:
union {
uint64_t u64[32 * VLEN / 64];
...
uint8_t u8[32 * VLEN / 8];
} velt;
This is clearer to the compiler that this is a single block of memory that we
can index as we please.
Note that float64/float32/float16 are legacy. They will always be equivalent
to the unsigned integer types of the same size.
Is there really any vector operation at all that is dependent on XLEN? If not,
then there is no reason to confuse things by including target_ulong.
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index e32b612..405caf6 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
> [PRV_H] = RISCV_EXCP_H_ECALL,
> [PRV_M] = RISCV_EXCP_M_ECALL
> };
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + return;
> + }
> + }
Again, not the way to implement first-fault.
In particular, you haven't even verified that do_interrupt has been called on
behalf of a RISCV_EXCP_LOAD_PAGE_FAULT. This could be a timer tick.
> +#define MAX_U8 ((uint8_t)0xff)
> +#define MIN_U8 ((uint8_t)0x0)
> +#define MAX_S8 ((int8_t)0x7f)
> +#define MIN_S8 ((int8_t)0x80)
> +#define SIGNBIT16 (1 << 15)
> +#define MAX_U16 ((uint16_t)0xffff)
> +#define MIN_U16 ((uint16_t)0x0)
> +#define MAX_S16 ((int16_t)0x7fff)
> +#define MIN_S16 ((int16_t)0x8000)
> +#define SIGNBIT32 (1 << 31)
> +#define MAX_U32 ((uint32_t)0xffffffff)
> +#define MIN_U32 ((uint32_t)0x0)
> +#define MAX_S32 ((int32_t)0x7fffffff)
> +#define MIN_S32 ((int32_t)0x80000000)
> +#define SIGNBIT64 ((uint64_t)1 << 63)
> +#define MAX_U64 ((uint64_t)0xffffffffffffffff)
> +#define MIN_U64 ((uint64_t)0x0)
> +#define MAX_S64 ((int64_t)0x7fffffffffffffff)
> +#define MIN_S64 ((int64_t)0x8000000000000000)
Why are you replicating INT8_MIN et al?
> +static target_ulong vector_get_index(CPURISCVState *env, int rs1, int rs2,
> + int index, int mem, int width, int nf)
> +{
> + target_ulong abs_off, base = env->gpr[rs1];
> + target_long offset;
> + switch (width) {
> + case 8:
> + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + nf * mem;
> + break;
> + case 16:
> + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + nf * mem;
> + break;
> + case 32:
> + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + nf * mem;
> + break;
> + case 64:
> + offset = env->vfp.vreg[rs2].s64[index] + nf * mem;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
This is broken. You cannot use GETPC() anywhere except in the outermost
HELPER(). Otherwise you're not computing the return address back into the
code_gen_buffer, which is what is required to properly unwind the guest state.
> +static inline bool vector_vtype_ill(CPURISCVState *env)
> +{
> + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline void vector_vtype_set_ill(CPURISCVState *env)
> +{
> + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1);
> + return;
> +}
> +
> +static inline int vector_vtype_get_sew(CPURISCVState *env)
> +{
> + return (env->vfp.vtype >> 2) & 0x7;
> +}
> +
> +static inline int vector_get_width(CPURISCVState *env)
> +{
> + return 8 * (1 << vector_vtype_get_sew(env));
> +}
> +
> +static inline int vector_get_lmul(CPURISCVState *env)
> +{
> + return 1 << (env->vfp.vtype & 0x3);
> +}
> +
> +static inline int vector_get_vlmax(CPURISCVState *env)
> +{
> + return vector_get_lmul(env) * VLEN / vector_get_width(env);
> +}
> +
> +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int width,
> + int lmul, int index)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / 8;
> + int pos = (index * mlen) % 8;
> +
> + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1);
> +}
I would strongly encourage you place the components of vtype within tb_flags
via cpu_get_tb_cpu_state. This would allow you to move quite a few checks from
run-time to translation-time.
Recall that translation happens once (per configuration), whereas execution
happens many times. Obviously, the more configurations that we create, the
more translation that must happen.
But the vtypei argument to vsetvli is a good choice, because it is constant,
relates directly to the compiled code, and is unrelated to the length of the
data being processed.
With that, you can verify at translation:
(1) vill
(2) v[n], for (n % lmul) != 0
(3) v[n] overlapping v[0] for masked/carry operations, with lmul > 1
and
(4) you can arrange the helpers so that instead of 1 helper that has to
handle all SEW, you have N helpers, each handling a different SEW.
And with all of this done, I believe you no longer need to pass the register
number to the helper. You can pass the address of v[n], which is much more
like how the tcg generic vector support works.
Whether or not to include VL in tb_flags is a harder choice. Certainly not the
exact value of VL, as that would lead to different translations for every loop
tail. But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a
single bit. Knowing that this condition is true would allow some use of the
tcg generic vector support.
E.g. vadd.vv could be
if (masked) {
switch (SEW) {
case MO_8:
gen_helper_vadd8_mask(...);
break;
...
}
} else if (vl_eq_vlmax) {
tcg_gen_gvec_add(SEW, vreg_ofs(vd), vreg_ofs(vs2), vreg_ofs(vs1),
VLEN * LMUL, VLEN * LMUL);
} else {
switch (SEW) {
case MO_8:
gen_helper_vadd8(...);
break;
...
}
}
Or, equivalently, pack pointers to the actual generator functions into a
structure so that this code structure can be shared between many instructions.
Bear in mind that all tcg gvec operations operate strictly upon lanes. I.e.
vd[x] = vs1[x] op vs2[x]
thus the actual arrangement of the elements in storage is irrelevant and SLEN
need not be considered here.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-28 18:54 ` Richard Henderson
0 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-08-28 18:54 UTC (permalink / raw)
To: liuzhiwei, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 8/27/19 7:36 PM, liuzhiwei wrote:
> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
> ---
> fpu/softfloat.c | 119 +
> include/fpu/softfloat.h | 4 +
> linux-user/riscv/cpu_loop.c | 8 +-
> target/riscv/Makefile.objs | 2 +-
> target/riscv/cpu.h | 30 +
> target/riscv/cpu_bits.h | 15 +
> target/riscv/cpu_helper.c | 7 +
> target/riscv/csr.c | 65 +-
> target/riscv/helper.h | 354 +
> target/riscv/insn32.decode | 374 +-
> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> target/riscv/translate.c | 1 +
> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
> 13 files changed, 28017 insertions(+), 9 deletions(-)
As Alex mentioned, this is *far* too big to be presented as a single patch.
> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
> index 3ff3fa5..3b0754c 100644
> --- a/include/fpu/softfloat.h
> +++ b/include/fpu/softfloat.h
> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status);
> float16 float16_sqrt(float16, float_status *status);
> int float16_compare(float16, float16, float_status *status);
> int float16_compare_quiet(float16, float16, float_status *status);
> +int float16_unordered_quiet(float16, float16, float_status *status);
> +int float16_le(float16, float16, float_status *status);
> +int float16_lt(float16, float16, float_status *status);
> +int float16_eq_quiet(float16, float16, float_status *status);
As Alex mentioned, none of these changes are required, as all
functionality is provided by float16_compare{,_quiet}.
> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
> index 12aa3c0..b01548a 100644
> --- a/linux-user/riscv/cpu_loop.c
> +++ b/linux-user/riscv/cpu_loop.c
> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
> signum = 0;
> sigcode = 0;
> sigaddr = 0;
> -
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + continue;
> + }
This is most definitely not the correct way to implement first-fault.
You need to have a look at target/arm/sve_helper.c, e.g. sve_ldff1_r,
where we test pages for validity with tlb_vaddr_to_host.
> + /* vector coprocessor state. */
> + struct {
> + union VECTOR {
> + float64 f64[VUNIT(64)];
> + float32 f32[VUNIT(32)];
> + float16 f16[VUNIT(16)];
> + target_ulong ul[VUNIT(sizeof(target_ulong))];
> + uint64_t u64[VUNIT(64)];
> + int64_t s64[VUNIT(64)];
> + uint32_t u32[VUNIT(32)];
> + int32_t s32[VUNIT(32)];
> + uint16_t u16[VUNIT(16)];
> + int16_t s16[VUNIT(16)];
> + uint8_t u8[VUNIT(8)];
> + int8_t s8[VUNIT(8)];
> + } vreg[32];
> + target_ulong vxrm;
> + target_ulong vxsat;
> + target_ulong vl;
> + target_ulong vstart;
> + target_ulong vtype;
> + float_status fp_status;
> + } vfp;
You've obviously copied "vfp" from target/arm. Drop that. It makes no sense
in the context of risc-v.
I'm not sure that vreg[].element[] really makes the most sense in the context
of how risc-v rearranges its elements. It will almost certainly fail clang
validators, if enabled, since you'll be indexing beyond the end of vreg[n] into
vreg[n+1].
It might be best to have a single array:
union {
uint64_t u64[32 * VLEN / 64];
...
uint8_t u8[32 * VLEN / 8];
} velt;
This is clearer to the compiler that this is a single block of memory that we
can index as we please.
Note that float64/float32/float16 are legacy. They will always be equivalent
to the unsigned integer types of the same size.
Is there really any vector operation at all that is dependent on XLEN? If not,
then there is no reason to confuse things by including target_ulong.
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index e32b612..405caf6 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
> [PRV_H] = RISCV_EXCP_H_ECALL,
> [PRV_M] = RISCV_EXCP_M_ECALL
> };
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + return;
> + }
> + }
Again, not the way to implement first-fault.
In particular, you haven't even verified that do_interrupt has been called on
behalf of a RISCV_EXCP_LOAD_PAGE_FAULT. This could be a timer tick.
> +#define MAX_U8 ((uint8_t)0xff)
> +#define MIN_U8 ((uint8_t)0x0)
> +#define MAX_S8 ((int8_t)0x7f)
> +#define MIN_S8 ((int8_t)0x80)
> +#define SIGNBIT16 (1 << 15)
> +#define MAX_U16 ((uint16_t)0xffff)
> +#define MIN_U16 ((uint16_t)0x0)
> +#define MAX_S16 ((int16_t)0x7fff)
> +#define MIN_S16 ((int16_t)0x8000)
> +#define SIGNBIT32 (1 << 31)
> +#define MAX_U32 ((uint32_t)0xffffffff)
> +#define MIN_U32 ((uint32_t)0x0)
> +#define MAX_S32 ((int32_t)0x7fffffff)
> +#define MIN_S32 ((int32_t)0x80000000)
> +#define SIGNBIT64 ((uint64_t)1 << 63)
> +#define MAX_U64 ((uint64_t)0xffffffffffffffff)
> +#define MIN_U64 ((uint64_t)0x0)
> +#define MAX_S64 ((int64_t)0x7fffffffffffffff)
> +#define MIN_S64 ((int64_t)0x8000000000000000)
Why are you replicating INT8_MIN et al?
> +static target_ulong vector_get_index(CPURISCVState *env, int rs1, int rs2,
> + int index, int mem, int width, int nf)
> +{
> + target_ulong abs_off, base = env->gpr[rs1];
> + target_long offset;
> + switch (width) {
> + case 8:
> + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + nf * mem;
> + break;
> + case 16:
> + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + nf * mem;
> + break;
> + case 32:
> + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + nf * mem;
> + break;
> + case 64:
> + offset = env->vfp.vreg[rs2].s64[index] + nf * mem;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
This is broken. You cannot use GETPC() anywhere except in the outermost
HELPER(). Otherwise you're not computing the return address back into the
code_gen_buffer, which is what is required to properly unwind the guest state.
> +static inline bool vector_vtype_ill(CPURISCVState *env)
> +{
> + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline void vector_vtype_set_ill(CPURISCVState *env)
> +{
> + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1);
> + return;
> +}
> +
> +static inline int vector_vtype_get_sew(CPURISCVState *env)
> +{
> + return (env->vfp.vtype >> 2) & 0x7;
> +}
> +
> +static inline int vector_get_width(CPURISCVState *env)
> +{
> + return 8 * (1 << vector_vtype_get_sew(env));
> +}
> +
> +static inline int vector_get_lmul(CPURISCVState *env)
> +{
> + return 1 << (env->vfp.vtype & 0x3);
> +}
> +
> +static inline int vector_get_vlmax(CPURISCVState *env)
> +{
> + return vector_get_lmul(env) * VLEN / vector_get_width(env);
> +}
> +
> +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int width,
> + int lmul, int index)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / 8;
> + int pos = (index * mlen) % 8;
> +
> + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1);
> +}
I would strongly encourage you place the components of vtype within tb_flags
via cpu_get_tb_cpu_state. This would allow you to move quite a few checks from
run-time to translation-time.
Recall that translation happens once (per configuration), whereas execution
happens many times. Obviously, the more configurations that we create, the
more translation that must happen.
But the vtypei argument to vsetvli is a good choice, because it is constant,
relates directly to the compiled code, and is unrelated to the length of the
data being processed.
With that, you can verify at translation:
(1) vill
(2) v[n], for (n % lmul) != 0
(3) v[n] overlapping v[0] for masked/carry operations, with lmul > 1
and
(4) you can arrange the helpers so that instead of 1 helper that has to
handle all SEW, you have N helpers, each handling a different SEW.
And with all of this done, I believe you no longer need to pass the register
number to the helper. You can pass the address of v[n], which is much more
like how the tcg generic vector support works.
Whether or not to include VL in tb_flags is a harder choice. Certainly not the
exact value of VL, as that would lead to different translations for every loop
tail. But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a
single bit. Knowing that this condition is true would allow some use of the
tcg generic vector support.
E.g. vadd.vv could be
if (masked) {
switch (SEW) {
case MO_8:
gen_helper_vadd8_mask(...);
break;
...
}
} else if (vl_eq_vlmax) {
tcg_gen_gvec_add(SEW, vreg_ofs(vd), vreg_ofs(vs2), vreg_ofs(vs1),
VLEN * LMUL, VLEN * LMUL);
} else {
switch (SEW) {
case MO_8:
gen_helper_vadd8(...);
break;
...
}
}
Or, equivalently, pack pointers to the actual generator functions into a
structure so that this code structure can be shared between many instructions.
Bear in mind that all tcg gvec operations operate strictly upon lanes. I.e.
vd[x] = vs1[x] op vs2[x]
thus the actual arrangement of the elements in storage is irrelevant and SLEN
need not be considered here.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 2:36 [Qemu-riscv] [PATCH] RISCV: support riscv vector extension 0.7.1 liuzhiwei
2019-08-28 9:08 ` [Qemu-riscv] " Alex Bennée
2019-08-28 18:54 ` [Qemu-riscv] " Richard Henderson
@ 2019-08-28 19:20 ` Aleksandar Markovic
2019-08-29 12:56 ` liuzhiwei
2019-08-28 21:34 ` [Qemu-riscv] " Alistair Francis
2019-08-29 14:06 ` [Qemu-riscv] " Chih-Min Chao
4 siblings, 1 reply; 52+ messages in thread
From: Aleksandar Markovic @ 2019-08-28 19:20 UTC (permalink / raw)
To: liuzhiwei
Cc: QEMU Developers, open list:RISC-V, Peter Maydell, Palmer Dabbelt,
Sagar Karandikar, Bastian Koppelmann, Riku Voipio,
Laurent Vivier, Alistair Francis, Alex Bennée,
Aurelien Jarno
[-- Attachment #1: Type: text/plain, Size: 1042265 bytes --]
> On Wed, Aug 28, 2019 at 9:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
> ---
>
Such large patch and "Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25"
is its entire commit message?? Horrible.
Aleksandar
> fpu/softfloat.c | 119 +
> include/fpu/softfloat.h | 4 +
> linux-user/riscv/cpu_loop.c | 8 +-
> target/riscv/Makefile.objs | 2 +-
> target/riscv/cpu.h | 30 +
> target/riscv/cpu_bits.h | 15 +
> target/riscv/cpu_helper.c | 7 +
> target/riscv/csr.c | 65 +-
> target/riscv/helper.h | 354 +
> target/riscv/insn32.decode | 374 +-
> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> target/riscv/translate.c | 1 +
> target/riscv/vector_helper.c | 26563
> ++++++++++++++++++++++++++++++
> 13 files changed, 28017 insertions(+), 9 deletions(-)
> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> create mode 100644 target/riscv/vector_helper.c
>
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index 2ba36ec..da155ea 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a)
> }
>
>
> /*----------------------------------------------------------------------------
> +| Returns the sign bit of the half-precision floating-point value `a'.
>
> +*----------------------------------------------------------------------------*/
> +
> +static inline flag extractFloat16Sign(float16 a)
> +{
> + return float16_val(a) >> 0xf;
> +}
> +
> +
>
> +/*----------------------------------------------------------------------------
> | Returns the fraction bits of the single-precision floating-point value
> `a'.
>
> *----------------------------------------------------------------------------*/
>
> @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status
> *status)
> }
>
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point value `a' is less than
> +| or equal to the corresponding value `b', and 0 otherwise. The invalid
> +| exception is raised if either operand is a NaN. The comparison is
> performed
> +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>
> +*----------------------------------------------------------------------------*/
> +
> +int float16_le(float16 a, float16 b, float_status *status)
> +{
> + flag aSign, bSign;
> + uint16_t av, bv;
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a
> ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b
> ) )
> + ) {
> + float_raise(float_flag_invalid, status);
> + return 0;
> + }
> + aSign = extractFloat16Sign( a );
> + bSign = extractFloat16Sign( b );
> + av = float16_val(a);
> + bv = float16_val(b);
> + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 )
> == 0 );
> + return ( av == bv ) || ( aSign ^ ( av < bv ) );
> +
> +}
> +
>
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point value `a' is less than
> | or equal to the corresponding value `b', and 0 otherwise. The invalid
> | exception is raised if either operand is a NaN. The comparison is
> performed
> @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status
> *status)
> | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>
> *----------------------------------------------------------------------------*/
>
> +int float16_lt(float16 a, float16 b, float_status *status)
> +{
> + flag aSign, bSign;
> + uint16_t av, bv;
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a
> ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b
> ) )
> + ) {
> + float_raise(float_flag_invalid, status);
> + return 0;
> + }
> + aSign = extractFloat16Sign( a );
> + bSign = extractFloat16Sign( b );
> + av = float16_val(a);
> + bv = float16_val(b);
> + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 )
> != 0 );
> + return ( av != bv ) && ( aSign ^ ( av < bv ) );
> +
> +}
> +
>
> +/*----------------------------------------------------------------------------
> +| Returns 1 if the single-precision floating-point value `a' is less than
> +| the corresponding value `b', and 0 otherwise. The invalid exception is
> +| raised if either operand is a NaN. The comparison is performed
> according
> +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>
> +*----------------------------------------------------------------------------*/
> +
> int float32_lt(float32 a, float32 b, float_status *status)
> {
> flag aSign, bSign;
> @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b,
> float_status *status)
> }
>
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point value `a' is equal to
> +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
> an
> +| exception. The comparison is performed according to the IEC/IEEE
> Standard
> +| for Binary Floating-Point Arithmetic.
>
> +*----------------------------------------------------------------------------*/
> +
> +int float16_eq_quiet(float16 a, float16 b, float_status *status)
> +{
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a
> ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b
> ) )
> + ) {
> + if (float16_is_signaling_nan(a, status)
> + || float16_is_signaling_nan(b, status)) {
> + float_raise(float_flag_invalid, status);
> + }
> + return 0;
> + }
> + return ( float16_val(a) == float16_val(b) ) ||
> + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0
> );
> +}
> +
> +
>
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point value `a' is equal to
> | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
> an
> | exception. The comparison is performed according to the IEC/IEEE
> Standard
> @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b,
> float_status *status)
> }
>
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point values `a' and `b' cannot
> +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
> The
> +| comparison is performed according to the IEC/IEEE Standard for Binary
> +| Floating-Point Arithmetic.
>
> +*----------------------------------------------------------------------------*/
> +
> +int float16_unordered_quiet(float16 a, float16 b, float_status *status)
> +{
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a
> ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b
> ) )
> + ) {
> + if (float16_is_signaling_nan(a, status)
> + || float16_is_signaling_nan(b, status)) {
> + float_raise(float_flag_invalid, status);
> + }
> + return 1;
> + }
> + return 0;
> +}
> +
> +
>
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point values `a' and `b'
> cannot
> | be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
> The
> | comparison is performed according to the IEC/IEEE Standard for Binary
> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
> index 3ff3fa5..3b0754c 100644
> --- a/include/fpu/softfloat.h
> +++ b/include/fpu/softfloat.h
> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16,
> float_status *status);
> float16 float16_sqrt(float16, float_status *status);
> int float16_compare(float16, float16, float_status *status);
> int float16_compare_quiet(float16, float16, float_status *status);
> +int float16_unordered_quiet(float16, float16, float_status *status);
> +int float16_le(float16, float16, float_status *status);
> +int float16_lt(float16, float16, float_status *status);
> +int float16_eq_quiet(float16, float16, float_status *status);
>
> int float16_is_quiet_nan(float16, float_status *status);
> int float16_is_signaling_nan(float16, float_status *status);
> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
> index 12aa3c0..b01548a 100644
> --- a/linux-user/riscv/cpu_loop.c
> +++ b/linux-user/riscv/cpu_loop.c
> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
> signum = 0;
> sigcode = 0;
> sigaddr = 0;
> -
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + continue;
> + }
> + }
> switch (trapnr) {
> case EXCP_INTERRUPT:
> /* just indicate that signals should be handled asap */
> diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
> index b1c79bc..d577cef 100644
> --- a/target/riscv/Makefile.objs
> +++ b/target/riscv/Makefile.objs
> @@ -1,4 +1,4 @@
> -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o
> gdbstub.o pmp.o
> +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o
> vector_helper.o gdbstub.o pmp.o
>
> DECODETREE = $(SRC_PATH)/scripts/decodetree.py
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 0adb307..5a93aa2 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -67,6 +67,7 @@
> #define RVC RV('C')
> #define RVS RV('S')
> #define RVU RV('U')
> +#define RVV RV('V')
>
> /* S extension denotes that Supervisor mode exists, however it is possible
> to have a core that support S mode but does not have an MMU and there
> @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState;
>
> #include "pmp.h"
>
> +#define VLEN 128
> +#define VUNIT(x) (VLEN / x)
> +
> struct CPURISCVState {
> target_ulong gpr[32];
> uint64_t fpr[32]; /* assume both F and D extensions */
> +
> + /* vector coprocessor state. */
> + struct {
> + union VECTOR {
> + float64 f64[VUNIT(64)];
> + float32 f32[VUNIT(32)];
> + float16 f16[VUNIT(16)];
> + target_ulong ul[VUNIT(sizeof(target_ulong))];
> + uint64_t u64[VUNIT(64)];
> + int64_t s64[VUNIT(64)];
> + uint32_t u32[VUNIT(32)];
> + int32_t s32[VUNIT(32)];
> + uint16_t u16[VUNIT(16)];
> + int16_t s16[VUNIT(16)];
> + uint8_t u8[VUNIT(8)];
> + int8_t s8[VUNIT(8)];
> + } vreg[32];
> + target_ulong vxrm;
> + target_ulong vxsat;
> + target_ulong vl;
> + target_ulong vstart;
> + target_ulong vtype;
> + float_status fp_status;
> + } vfp;
> +
> + bool foflag;
> target_ulong pc;
> target_ulong load_res;
> target_ulong load_val;
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index 11f971a..9eb43ec 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -29,6 +29,14 @@
> #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT)
> #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA |
> FSR_NXA)
>
> +/* Vector Fixed-Point round model */
> +#define FSR_VXRM_SHIFT 9
> +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT)
> +
> +/* Vector Fixed-Point saturation flag */
> +#define FSR_VXSAT_SHIFT 8
> +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT)
> +
> /* Control and Status Registers */
>
> /* User Trap Setup */
> @@ -48,6 +56,13 @@
> #define CSR_FRM 0x002
> #define CSR_FCSR 0x003
>
> +/* User Vector CSRs */
> +#define CSR_VSTART 0x008
> +#define CSR_VXSAT 0x009
> +#define CSR_VXRM 0x00a
> +#define CSR_VL 0xc20
> +#define CSR_VTYPE 0xc21
> +
> /* User Timers and Counters */
> #define CSR_CYCLE 0xc00
> #define CSR_TIME 0xc01
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index e32b612..405caf6 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
> [PRV_H] = RISCV_EXCP_H_ECALL,
> [PRV_M] = RISCV_EXCP_M_ECALL
> };
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + return;
> + }
> + }
>
> if (!async) {
> /* set tval to badaddr for traps with address information */
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index e0d4586..a6131ff 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno)
> return 0;
> }
>
> -#if !defined(CONFIG_USER_ONLY)
> static int any(CPURISCVState *env, int csrno)
> {
> return 0;
> }
>
> +#if !defined(CONFIG_USER_ONLY)
> static int smode(CPURISCVState *env, int csrno)
> {
> return -!riscv_has_ext(env, RVS);
> @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno,
> target_ulong *val)
> return -1;
> }
> #endif
> - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
> - | (env->frm << FSR_RD_SHIFT);
> + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT)
> + | (env->vfp.vxsat << FSR_VXSAT_SHIFT)
> + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
> + | (env->frm << FSR_RD_SHIFT);
> return 0;
> }
>
> @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno,
> target_ulong val)
> env->mstatus |= MSTATUS_FS;
> #endif
> env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
> + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
> + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
> riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
> return 0;
> }
>
> +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vtype;
> + return 0;
> +}
> +
> +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vl;
> + return 0;
> +}
> +
> +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vxrm;
> + return 0;
> +}
> +
> +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vxsat;
> + return 0;
> +}
> +
> +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vstart;
> + return 0;
> +}
> +
> +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val)
> +{
> + env->vfp.vxrm = val;
> + return 0;
> +}
> +
> +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val)
> +{
> + env->vfp.vxsat = val;
> + return 0;
> +}
> +
> +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val)
> +{
> + env->vfp.vstart = val;
> + return 0;
> +}
> +
> /* User Timers and Counters */
> static int read_instret(CPURISCVState *env, int csrno, target_ulong *val)
> {
> @@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] =
> {
> [CSR_FFLAGS] = { fs, read_fflags, write_fflags
> },
> [CSR_FRM] = { fs, read_frm, write_frm
> },
> [CSR_FCSR] = { fs, read_fcsr, write_fcsr
> },
> -
> + /* Vector CSRs */
> + [CSR_VSTART] = { any, read_vstart, write_vstart
> },
> + [CSR_VXSAT] = { any, read_vxsat, write_vxsat
> },
> + [CSR_VXRM] = { any, read_vxrm, write_vxrm
> },
> + [CSR_VL] = { any, read_vl
> },
> + [CSR_VTYPE] = { any, read_vtype
> },
> /* User Timers and Counters */
> [CSR_CYCLE] = { ctr, read_instret
> },
> [CSR_INSTRET] = { ctr, read_instret
> },
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index debb22a..fee02c0 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl)
> DEF_HELPER_1(wfi, void, env)
> DEF_HELPER_1(tlb_flush, void, env)
> #endif
> +/* Vector functions */
> +DEF_HELPER_5(vector_vlb_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlh_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlw_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vle_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlbu_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlhu_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlwu_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlbff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlhff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlwff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vleff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlbuff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlhuff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlwuff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsb_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsh_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsw_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vse_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlsb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlsh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlsw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlse_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlsbu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlshu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlswu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vssb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vssh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vssw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsse_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxe_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxbu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxhu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxwu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsxb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsxh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsxw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsxe_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsuxb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsuxh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsuxw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsuxe_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoswapw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoswapd_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoaddw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoaddd_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoxorw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoxord_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoandw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoandd_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoorw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoord_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamominw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomind_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomaxw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomaxd_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamominuw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamominud_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomaxuw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomaxud_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_4(vector_vext_x_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfmv_f_s, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmv_s_x, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfmv_s_f, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vadc_vvm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vadc_vxm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vadc_vim, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmadc_vvm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmadc_vxm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmadc_vim, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vsbc_vvm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vsbc_vxm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsbc_vvm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsbc_vxm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmpopc_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmfirst_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vcompress_vm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmandnot_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmand_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmor_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmxor_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmornot_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmnand_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmnor_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmxnor_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsbf_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsof_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsif_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_viota_m, void, env, i32, i32, i32)
> +DEF_HELPER_3(vector_vid_v, void, env, i32, i32)
> +DEF_HELPER_4(vector_vfcvt_xu_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfcvt_x_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfcvt_f_xu_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfcvt_f_x_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_xu_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_x_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_f_xu_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_f_x_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_f_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_xu_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_x_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_f_xu_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_f_x_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_f_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfsqrt_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfclass_v, void, env, i32, i32, i32)
> +DEF_HELPER_5(vector_vadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vadd_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredsum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfadd_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredand_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfredsum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredor_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrsub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrsub_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredxor_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfredosum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vminu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vminu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredminu_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmin_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmin_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmin_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmin_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredmin_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfredmin_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmaxu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmaxu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredmaxu_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmax_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmax_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmax_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmax_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredmax_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfredmax_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnj_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnj_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vand_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vand_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vand_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnjn_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnjn_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vor_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vor_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vor_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnjx_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnjx_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vxor_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vxor_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vxor_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrgather_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrgather_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrgather_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslideup_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslideup_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslide1up_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslidedown_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslidedown_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslide1down_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmerge_vvm, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmerge_vxm, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmerge_vim, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmerge_vfm, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmseq_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmseq_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmseq_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfeq_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfeq_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsne_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsne_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsne_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfle_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfle_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsltu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsltu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmford_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmford_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmslt_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmslt_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmflt_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmflt_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsleu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsleu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsleu_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfne_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfne_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsle_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsle_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsle_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfgt_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsgtu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsgtu_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsgt_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsgt_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfge_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsaddu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsaddu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsaddu_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vdivu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vdivu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfdiv_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfdiv_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsadd_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vdiv_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vdiv_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfrdiv_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssubu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssubu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vremu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vremu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrem_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrem_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vaadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vaadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vaadd_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulhu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulhu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmul_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsll_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsll_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsll_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmul_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vasub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vasub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulhsu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulhsu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsmul_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulh_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulh_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfrsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsrl_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsrl_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsrl_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmadd_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsra_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsra_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsra_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmadd_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssrl_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssrl_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssrl_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssra_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssra_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssra_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnmsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnmsub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsrl_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsrl_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsrl_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmacc_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsra_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsra_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsra_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmacc_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmacc_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclipu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclipu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclipu_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmsac_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclip_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclip_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclip_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnmsac_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmsac_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwredsumu_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwaddu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwaddu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwadd_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwredsum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwredsum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsubu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsubu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwredosum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwaddu_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwaddu_wx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwadd_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwadd_wf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwadd_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwadd_wx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsubu_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsubu_wx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwsub_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwsub_wf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsub_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsub_wx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmulu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmulu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmul_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmulsu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmulsu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmul_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmacc_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmacc_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmacc_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwnmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwnmacc_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccsu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccsu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccsu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccsu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmsac_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccus_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccus_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwnmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwnmsac_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32)
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index 77f794e..d125ff9 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -25,7 +25,7 @@
> %sh10 20:10
> %csr 20:12
> %rm 12:3
> -
> +%nf 29:3
> # immediates:
> %imm_i 20:s12
> %imm_s 25:s7 7:5
> @@ -43,7 +43,6 @@
> &u imm rd
> &shift shamt rs1 rd
> &atomic aq rl rs2 rs1 rd
> -
> # Formats 32:
> @r ....... ..... ..... ... ..... ....... &r %rs2
> %rs1 %rd
> @i ............ ..... ... ..... ....... &i imm=%imm_i
> %rs1 %rd
> @@ -62,11 +61,17 @@
> @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
> @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd
> @r2 ....... ..... ..... ... ..... ....... %rs1 %rd
> +@r_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
> +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
> +@r_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
> +@r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd
> +@r2_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rd
> +@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd
> +@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
>
> @sfence_vma ....... ..... ..... ... ..... ....... %rs2 %rs1
> @sfence_vm ....... ..... ..... ... ..... ....... %rs1
>
> -
> # *** Privileged Instructions ***
> ecall 000000000000 00000 000 00000 1110011
> ebreak 000000000001 00000 000 00000 1110011
> @@ -203,3 +208,366 @@ fcvt_w_d 1100001 00000 ..... ... ..... 1010011
> @r2_rm
> fcvt_wu_d 1100001 00001 ..... ... ..... 1010011 @r2_rm
> fcvt_d_w 1101001 00000 ..... ... ..... 1010011 @r2_rm
> fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm
> +
> +# *** RV32V Standard Extension ***
> +
> +# *** Vector loads and stores are encoded within LOADFP/STORE-FP ***
> +vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm
> +vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm
> +vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm
> +vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm
> +vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm
> +vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm
> +vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm
> +vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm
> +vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm
> +vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm
> +vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm
> +vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm
> +vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm
> +vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm
> +vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm
> +vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm
> +vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm
> +vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm
> +
> +vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm
> +vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm
> +vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm
> +vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm
> +vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm
> +vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm
> +vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm
> +vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm
> +vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm
> +vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm
> +vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm
> +
> +vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm
> +vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm
> +vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm
> +vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm
> +vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm
> +vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm
> +vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm
> +vsxb_v ... 011 . ..... ..... 000 ..... 0100111 @r_nfvm
> +vsxh_v ... 011 . ..... ..... 101 ..... 0100111 @r_nfvm
> +vsxw_v ... 011 . ..... ..... 110 ..... 0100111 @r_nfvm
> +vsxe_v ... 011 . ..... ..... 111 ..... 0100111 @r_nfvm
> +vsuxb_v ... 111 . ..... ..... 000 ..... 0100111 @r_nfvm
> +vsuxh_v ... 111 . ..... ..... 101 ..... 0100111 @r_nfvm
> +vsuxw_v ... 111 . ..... ..... 110 ..... 0100111 @r_nfvm
> +vsuxe_v ... 111 . ..... ..... 111 ..... 0100111 @r_nfvm
> +
> +#*** Vector AMO operations are encoded under the standard AMO major
> opcode.***
> +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +
> +#*** new major opcode OP-V ***
> +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm
> +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm
> +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm
> +vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm
> +vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm
> +vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm
> +vfredsum_vs 000001 . ..... ..... 001 ..... 1010111 @r_vm
> +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm
> +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm
> +vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm
> +vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm
> +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm
> +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm
> +vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm
> +vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm
> +vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm
> +vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm
> +vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm
> +vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm
> +vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm
> +vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm
> +vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm
> +vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm
> +vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm
> +vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm
> +vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm
> +vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm
> +vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm
> +vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm
> +vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm
> +vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm
> +vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm
> +vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm
> +vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm
> +vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm
> +vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm
> +vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm
> +vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm
> +vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm
> +vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm
> +vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm
> +vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm
> +vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm
> +vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm
> +vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm
> +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r
> +vfmv_f_s 001100 1 ..... ..... 001 ..... 1010111 @r
> +vmv_s_x 001101 1 ..... ..... 110 ..... 1010111 @r
> +vfmv_s_f 001101 1 ..... ..... 101 ..... 1010111 @r
> +vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm
> +vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm
> +vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm
> +vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm
> +vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm
> +vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm
> +vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r
> +vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r
> +vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r
> +vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r
> +vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r
> +vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r
> +vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r
> +vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r
> +vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r
> +vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r
> +vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm
> +vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm
> +vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm
> +vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm
> +vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm
> +viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm
> +vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm
> +vmerge_vvm 010111 . ..... ..... 000 ..... 1010111 @r_vm
> +vmerge_vxm 010111 . ..... ..... 100 ..... 1010111 @r_vm
> +vmerge_vim 010111 . ..... ..... 011 ..... 1010111 @r_vm
> +vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r
> +vfmerge_vfm 010111 . ..... ..... 101 ..... 1010111 @r_vm
> +vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm
> +vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm
> +vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm
> +vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r
> +vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm
> +vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm
> +vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm
> +vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r
> +vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm
> +vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm
> +vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm
> +vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r
> +vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm
> +vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm
> +vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm
> +vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm
> +vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r
> +vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm
> +vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm
> +vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm
> +vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r
> +vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm
> +vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm
> +vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm
> +vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r
> +vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm
> +vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r
> +vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm
> +vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r
> +vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm
> +vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm
> +vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm
> +vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm
> +vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm
> +vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm
> +vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm
> +vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm
> +vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm
> +vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm
> +vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm
> +vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm
> +vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm
> +vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm
> +vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm
> +vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm
> +vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm
> +vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm
> +vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm
> +vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm
> +vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm
> +vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm
> +vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm
> +vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm
> +vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm
> +vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm
> +vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm
> +vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm
> +vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm
> +vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm
> +vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm
> +vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm
> +vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm
> +vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm
> +vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm
> +vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm
> +vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm
> +vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm
> +vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm
> +vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm
> +vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm
> +vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm
> +vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm
> +vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm
> +vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm
> +vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm
> +vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm
> +vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm
> +vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm
> +vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm
> +vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm
> +vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm
> +vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm
> +vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm
> +vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm
> +vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm
> +vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm
> +vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm
> +vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm
> +vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm
> +vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm
> +vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm
> +vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm
> +vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm
> +vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm
> +vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm
> +vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm
> +vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm
> +vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm
> +vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm
> +vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm
> +vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm
> +vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm
> +vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm
> +vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm
> +vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm
> +vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm
> +vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm
> +vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm
> +vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm
> +vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm
> +vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm
> +vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm
> +vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm
> +vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm
> +vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm
> +vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm
> +vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm
> +vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm
> +vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm
> +vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm
> +vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm
> +vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm
> +vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm
> +vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm
> +vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm
> +vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm
> +vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm
> +vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm
> +vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm
> +vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm
> +vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm
> +vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm
> +vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm
> +vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm
> +vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm
> +vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwredsum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm
> +vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm
> +vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm
> +vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm
> +vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm
> +vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm
> +vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm
> +vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm
> +vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm
> +vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm
> +vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm
> +vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm
> +vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm
> +vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm
> +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm
> +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm
> +vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm
> +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm
> +vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm
> +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm
> +vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm
> +vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm
> +vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
> +vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
> diff --git a/target/riscv/insn_trans/trans_rvv.inc.c
> b/target/riscv/insn_trans/trans_rvv.inc.c
> new file mode 100644
> index 0000000..dc8e6ce
> --- /dev/null
> +++ b/target/riscv/insn_trans/trans_rvv.inc.c
> @@ -0,0 +1,484 @@
> +/*
> + * RISC-V translation routines for the RVV Standard Extension.
> + *
> + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
> for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
> along with
> + * this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#define GEN_VECTOR_R2_NFVM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 nf = tcg_const_i32(a->nf); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, d); \
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(nf); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +#define GEN_VECTOR_R_NFVM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 nf = tcg_const_i32(a->nf); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, s2, d);\
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(nf); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +
> +#define GEN_VECTOR_R_WDVM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 wd = tcg_const_i32(a->wd); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, wd, vm, s1, s2, d);\
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(wd); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +#define GEN_VECTOR_R(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + gen_helper_vector_##INSN(cpu_env, s1, s2, d); \
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + return true; \
> +}
> +#define GEN_VECTOR_R2_VM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, vm, s2, d); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +
> +#define GEN_VECTOR_R1_VM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, vm, d); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +#define GEN_VECTOR_R_VM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, vm, s1, s2, d); \
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +#define GEN_VECTOR_R2_ZIMM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 zimm = tcg_const_i32(a->zimm); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + gen_helper_vector_##INSN(cpu_env, s1, zimm, d); \
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(zimm); \
> + tcg_temp_free_i32(d); \
> + return true; \
> +}
> +
> +GEN_VECTOR_R2_NFVM(vlb_v)
> +GEN_VECTOR_R2_NFVM(vlh_v)
> +GEN_VECTOR_R2_NFVM(vlw_v)
> +GEN_VECTOR_R2_NFVM(vle_v)
> +GEN_VECTOR_R2_NFVM(vlbu_v)
> +GEN_VECTOR_R2_NFVM(vlhu_v)
> +GEN_VECTOR_R2_NFVM(vlwu_v)
> +GEN_VECTOR_R2_NFVM(vlbff_v)
> +GEN_VECTOR_R2_NFVM(vlhff_v)
> +GEN_VECTOR_R2_NFVM(vlwff_v)
> +GEN_VECTOR_R2_NFVM(vleff_v)
> +GEN_VECTOR_R2_NFVM(vlbuff_v)
> +GEN_VECTOR_R2_NFVM(vlhuff_v)
> +GEN_VECTOR_R2_NFVM(vlwuff_v)
> +GEN_VECTOR_R2_NFVM(vsb_v)
> +GEN_VECTOR_R2_NFVM(vsh_v)
> +GEN_VECTOR_R2_NFVM(vsw_v)
> +GEN_VECTOR_R2_NFVM(vse_v)
> +
> +GEN_VECTOR_R_NFVM(vlsb_v)
> +GEN_VECTOR_R_NFVM(vlsh_v)
> +GEN_VECTOR_R_NFVM(vlsw_v)
> +GEN_VECTOR_R_NFVM(vlse_v)
> +GEN_VECTOR_R_NFVM(vlsbu_v)
> +GEN_VECTOR_R_NFVM(vlshu_v)
> +GEN_VECTOR_R_NFVM(vlswu_v)
> +GEN_VECTOR_R_NFVM(vssb_v)
> +GEN_VECTOR_R_NFVM(vssh_v)
> +GEN_VECTOR_R_NFVM(vssw_v)
> +GEN_VECTOR_R_NFVM(vsse_v)
> +GEN_VECTOR_R_NFVM(vlxb_v)
> +GEN_VECTOR_R_NFVM(vlxh_v)
> +GEN_VECTOR_R_NFVM(vlxw_v)
> +GEN_VECTOR_R_NFVM(vlxe_v)
> +GEN_VECTOR_R_NFVM(vlxbu_v)
> +GEN_VECTOR_R_NFVM(vlxhu_v)
> +GEN_VECTOR_R_NFVM(vlxwu_v)
> +GEN_VECTOR_R_NFVM(vsxb_v)
> +GEN_VECTOR_R_NFVM(vsxh_v)
> +GEN_VECTOR_R_NFVM(vsxw_v)
> +GEN_VECTOR_R_NFVM(vsxe_v)
> +GEN_VECTOR_R_NFVM(vsuxb_v)
> +GEN_VECTOR_R_NFVM(vsuxh_v)
> +GEN_VECTOR_R_NFVM(vsuxw_v)
> +GEN_VECTOR_R_NFVM(vsuxe_v)
> +
> +GEN_VECTOR_R_WDVM(vamoswapw_v)
> +GEN_VECTOR_R_WDVM(vamoswapd_v)
> +GEN_VECTOR_R_WDVM(vamoaddw_v)
> +GEN_VECTOR_R_WDVM(vamoaddd_v)
> +GEN_VECTOR_R_WDVM(vamoxorw_v)
> +GEN_VECTOR_R_WDVM(vamoxord_v)
> +GEN_VECTOR_R_WDVM(vamoandw_v)
> +GEN_VECTOR_R_WDVM(vamoandd_v)
> +GEN_VECTOR_R_WDVM(vamoorw_v)
> +GEN_VECTOR_R_WDVM(vamoord_v)
> +GEN_VECTOR_R_WDVM(vamominw_v)
> +GEN_VECTOR_R_WDVM(vamomind_v)
> +GEN_VECTOR_R_WDVM(vamomaxw_v)
> +GEN_VECTOR_R_WDVM(vamomaxd_v)
> +GEN_VECTOR_R_WDVM(vamominuw_v)
> +GEN_VECTOR_R_WDVM(vamominud_v)
> +GEN_VECTOR_R_WDVM(vamomaxuw_v)
> +GEN_VECTOR_R_WDVM(vamomaxud_v)
> +
> +GEN_VECTOR_R(vext_x_v)
> +GEN_VECTOR_R(vfmv_f_s)
> +GEN_VECTOR_R(vmv_s_x)
> +GEN_VECTOR_R(vfmv_s_f)
> +GEN_VECTOR_R(vadc_vvm)
> +GEN_VECTOR_R(vadc_vxm)
> +GEN_VECTOR_R(vadc_vim)
> +GEN_VECTOR_R(vmadc_vvm)
> +GEN_VECTOR_R(vmadc_vxm)
> +GEN_VECTOR_R(vmadc_vim)
> +GEN_VECTOR_R(vsbc_vvm)
> +GEN_VECTOR_R(vsbc_vxm)
> +GEN_VECTOR_R(vmsbc_vvm)
> +GEN_VECTOR_R(vmsbc_vxm)
> +GEN_VECTOR_R2_VM(vmpopc_m)
> +GEN_VECTOR_R2_VM(vmfirst_m)
> +GEN_VECTOR_R(vcompress_vm)
> +GEN_VECTOR_R(vmandnot_mm)
> +GEN_VECTOR_R(vmand_mm)
> +GEN_VECTOR_R(vmor_mm)
> +GEN_VECTOR_R(vmxor_mm)
> +GEN_VECTOR_R(vmornot_mm)
> +GEN_VECTOR_R(vmnand_mm)
> +GEN_VECTOR_R(vmnor_mm)
> +GEN_VECTOR_R(vmxnor_mm)
> +GEN_VECTOR_R2_VM(vmsbf_m)
> +GEN_VECTOR_R2_VM(vmsof_m)
> +GEN_VECTOR_R2_VM(vmsif_m)
> +GEN_VECTOR_R2_VM(viota_m)
> +GEN_VECTOR_R1_VM(vid_v)
> +GEN_VECTOR_R2_VM(vfcvt_xu_f_v)
> +GEN_VECTOR_R2_VM(vfcvt_x_f_v)
> +GEN_VECTOR_R2_VM(vfcvt_f_xu_v)
> +GEN_VECTOR_R2_VM(vfcvt_f_x_v)
> +GEN_VECTOR_R2_VM(vfwcvt_xu_f_v)
> +GEN_VECTOR_R2_VM(vfwcvt_x_f_v)
> +GEN_VECTOR_R2_VM(vfwcvt_f_xu_v)
> +GEN_VECTOR_R2_VM(vfwcvt_f_x_v)
> +GEN_VECTOR_R2_VM(vfwcvt_f_f_v)
> +GEN_VECTOR_R2_VM(vfncvt_xu_f_v)
> +GEN_VECTOR_R2_VM(vfncvt_x_f_v)
> +GEN_VECTOR_R2_VM(vfncvt_f_xu_v)
> +GEN_VECTOR_R2_VM(vfncvt_f_x_v)
> +GEN_VECTOR_R2_VM(vfncvt_f_f_v)
> +GEN_VECTOR_R2_VM(vfsqrt_v)
> +GEN_VECTOR_R2_VM(vfclass_v)
> +
> +GEN_VECTOR_R_VM(vadd_vv)
> +GEN_VECTOR_R_VM(vadd_vx)
> +GEN_VECTOR_R_VM(vadd_vi)
> +GEN_VECTOR_R_VM(vredsum_vs)
> +GEN_VECTOR_R_VM(vfadd_vv)
> +GEN_VECTOR_R_VM(vfadd_vf)
> +GEN_VECTOR_R_VM(vredand_vs)
> +GEN_VECTOR_R_VM(vfredsum_vs)
> +GEN_VECTOR_R_VM(vsub_vv)
> +GEN_VECTOR_R_VM(vsub_vx)
> +GEN_VECTOR_R_VM(vredor_vs)
> +GEN_VECTOR_R_VM(vfsub_vv)
> +GEN_VECTOR_R_VM(vfsub_vf)
> +GEN_VECTOR_R_VM(vrsub_vx)
> +GEN_VECTOR_R_VM(vrsub_vi)
> +GEN_VECTOR_R_VM(vredxor_vs)
> +GEN_VECTOR_R_VM(vfredosum_vs)
> +GEN_VECTOR_R_VM(vminu_vv)
> +GEN_VECTOR_R_VM(vminu_vx)
> +GEN_VECTOR_R_VM(vredminu_vs)
> +GEN_VECTOR_R_VM(vfmin_vv)
> +GEN_VECTOR_R_VM(vfmin_vf)
> +GEN_VECTOR_R_VM(vmin_vv)
> +GEN_VECTOR_R_VM(vmin_vx)
> +GEN_VECTOR_R_VM(vredmin_vs)
> +GEN_VECTOR_R_VM(vfredmin_vs)
> +GEN_VECTOR_R_VM(vmaxu_vv)
> +GEN_VECTOR_R_VM(vmaxu_vx)
> +GEN_VECTOR_R_VM(vredmaxu_vs)
> +GEN_VECTOR_R_VM(vfmax_vv)
> +GEN_VECTOR_R_VM(vfmax_vf)
> +GEN_VECTOR_R_VM(vmax_vv)
> +GEN_VECTOR_R_VM(vmax_vx)
> +GEN_VECTOR_R_VM(vredmax_vs)
> +GEN_VECTOR_R_VM(vfredmax_vs)
> +GEN_VECTOR_R_VM(vfsgnj_vv)
> +GEN_VECTOR_R_VM(vfsgnj_vf)
> +GEN_VECTOR_R_VM(vand_vv)
> +GEN_VECTOR_R_VM(vand_vx)
> +GEN_VECTOR_R_VM(vand_vi)
> +GEN_VECTOR_R_VM(vfsgnjn_vv)
> +GEN_VECTOR_R_VM(vfsgnjn_vf)
> +GEN_VECTOR_R_VM(vor_vv)
> +GEN_VECTOR_R_VM(vor_vx)
> +GEN_VECTOR_R_VM(vor_vi)
> +GEN_VECTOR_R_VM(vfsgnjx_vv)
> +GEN_VECTOR_R_VM(vfsgnjx_vf)
> +GEN_VECTOR_R_VM(vxor_vv)
> +GEN_VECTOR_R_VM(vxor_vx)
> +GEN_VECTOR_R_VM(vxor_vi)
> +GEN_VECTOR_R_VM(vrgather_vv)
> +GEN_VECTOR_R_VM(vrgather_vx)
> +GEN_VECTOR_R_VM(vrgather_vi)
> +GEN_VECTOR_R_VM(vslideup_vx)
> +GEN_VECTOR_R_VM(vslideup_vi)
> +GEN_VECTOR_R_VM(vslide1up_vx)
> +GEN_VECTOR_R_VM(vslidedown_vx)
> +GEN_VECTOR_R_VM(vslidedown_vi)
> +GEN_VECTOR_R_VM(vslide1down_vx)
> +GEN_VECTOR_R_VM(vmerge_vvm)
> +GEN_VECTOR_R_VM(vmerge_vxm)
> +GEN_VECTOR_R_VM(vmerge_vim)
> +GEN_VECTOR_R_VM(vfmerge_vfm)
> +GEN_VECTOR_R_VM(vmseq_vv)
> +GEN_VECTOR_R_VM(vmseq_vx)
> +GEN_VECTOR_R_VM(vmseq_vi)
> +GEN_VECTOR_R_VM(vmfeq_vv)
> +GEN_VECTOR_R_VM(vmfeq_vf)
> +GEN_VECTOR_R_VM(vmsne_vv)
> +GEN_VECTOR_R_VM(vmsne_vx)
> +GEN_VECTOR_R_VM(vmsne_vi)
> +GEN_VECTOR_R_VM(vmfle_vv)
> +GEN_VECTOR_R_VM(vmfle_vf)
> +GEN_VECTOR_R_VM(vmsltu_vv)
> +GEN_VECTOR_R_VM(vmsltu_vx)
> +GEN_VECTOR_R_VM(vmford_vv)
> +GEN_VECTOR_R_VM(vmford_vf)
> +GEN_VECTOR_R_VM(vmslt_vv)
> +GEN_VECTOR_R_VM(vmslt_vx)
> +GEN_VECTOR_R_VM(vmflt_vv)
> +GEN_VECTOR_R_VM(vmflt_vf)
> +GEN_VECTOR_R_VM(vmsleu_vv)
> +GEN_VECTOR_R_VM(vmsleu_vx)
> +GEN_VECTOR_R_VM(vmsleu_vi)
> +GEN_VECTOR_R_VM(vmfne_vv)
> +GEN_VECTOR_R_VM(vmfne_vf)
> +GEN_VECTOR_R_VM(vmsle_vv)
> +GEN_VECTOR_R_VM(vmsle_vx)
> +GEN_VECTOR_R_VM(vmsle_vi)
> +GEN_VECTOR_R_VM(vmfgt_vf)
> +GEN_VECTOR_R_VM(vmsgtu_vx)
> +GEN_VECTOR_R_VM(vmsgtu_vi)
> +GEN_VECTOR_R_VM(vmsgt_vx)
> +GEN_VECTOR_R_VM(vmsgt_vi)
> +GEN_VECTOR_R_VM(vmfge_vf)
> +GEN_VECTOR_R_VM(vsaddu_vv)
> +GEN_VECTOR_R_VM(vsaddu_vx)
> +GEN_VECTOR_R_VM(vsaddu_vi)
> +GEN_VECTOR_R_VM(vdivu_vv)
> +GEN_VECTOR_R_VM(vdivu_vx)
> +GEN_VECTOR_R_VM(vfdiv_vv)
> +GEN_VECTOR_R_VM(vfdiv_vf)
> +GEN_VECTOR_R_VM(vsadd_vv)
> +GEN_VECTOR_R_VM(vsadd_vx)
> +GEN_VECTOR_R_VM(vsadd_vi)
> +GEN_VECTOR_R_VM(vdiv_vv)
> +GEN_VECTOR_R_VM(vdiv_vx)
> +GEN_VECTOR_R_VM(vfrdiv_vf)
> +GEN_VECTOR_R_VM(vssubu_vv)
> +GEN_VECTOR_R_VM(vssubu_vx)
> +GEN_VECTOR_R_VM(vremu_vv)
> +GEN_VECTOR_R_VM(vremu_vx)
> +GEN_VECTOR_R_VM(vssub_vv)
> +GEN_VECTOR_R_VM(vssub_vx)
> +GEN_VECTOR_R_VM(vrem_vv)
> +GEN_VECTOR_R_VM(vrem_vx)
> +GEN_VECTOR_R_VM(vaadd_vv)
> +GEN_VECTOR_R_VM(vaadd_vx)
> +GEN_VECTOR_R_VM(vaadd_vi)
> +GEN_VECTOR_R_VM(vmulhu_vv)
> +GEN_VECTOR_R_VM(vmulhu_vx)
> +GEN_VECTOR_R_VM(vfmul_vv)
> +GEN_VECTOR_R_VM(vfmul_vf)
> +GEN_VECTOR_R_VM(vsll_vv)
> +GEN_VECTOR_R_VM(vsll_vx)
> +GEN_VECTOR_R_VM(vsll_vi)
> +GEN_VECTOR_R_VM(vmul_vv)
> +GEN_VECTOR_R_VM(vmul_vx)
> +GEN_VECTOR_R_VM(vasub_vv)
> +GEN_VECTOR_R_VM(vasub_vx)
> +GEN_VECTOR_R_VM(vmulhsu_vv)
> +GEN_VECTOR_R_VM(vmulhsu_vx)
> +GEN_VECTOR_R_VM(vsmul_vv)
> +GEN_VECTOR_R_VM(vsmul_vx)
> +GEN_VECTOR_R_VM(vmulh_vv)
> +GEN_VECTOR_R_VM(vmulh_vx)
> +GEN_VECTOR_R_VM(vfrsub_vf)
> +GEN_VECTOR_R_VM(vsrl_vv)
> +GEN_VECTOR_R_VM(vsrl_vx)
> +GEN_VECTOR_R_VM(vsrl_vi)
> +GEN_VECTOR_R_VM(vfmadd_vv)
> +GEN_VECTOR_R_VM(vfmadd_vf)
> +GEN_VECTOR_R_VM(vsra_vv)
> +GEN_VECTOR_R_VM(vsra_vx)
> +GEN_VECTOR_R_VM(vsra_vi)
> +GEN_VECTOR_R_VM(vmadd_vv)
> +GEN_VECTOR_R_VM(vmadd_vx)
> +GEN_VECTOR_R_VM(vfnmadd_vv)
> +GEN_VECTOR_R_VM(vfnmadd_vf)
> +GEN_VECTOR_R_VM(vssrl_vv)
> +GEN_VECTOR_R_VM(vssrl_vx)
> +GEN_VECTOR_R_VM(vssrl_vi)
> +GEN_VECTOR_R_VM(vfmsub_vv)
> +GEN_VECTOR_R_VM(vfmsub_vf)
> +GEN_VECTOR_R_VM(vssra_vv)
> +GEN_VECTOR_R_VM(vssra_vx)
> +GEN_VECTOR_R_VM(vssra_vi)
> +GEN_VECTOR_R_VM(vnmsub_vv)
> +GEN_VECTOR_R_VM(vnmsub_vx)
> +GEN_VECTOR_R_VM(vfnmsub_vv)
> +GEN_VECTOR_R_VM(vfnmsub_vf)
> +GEN_VECTOR_R_VM(vnsrl_vv)
> +GEN_VECTOR_R_VM(vnsrl_vx)
> +GEN_VECTOR_R_VM(vnsrl_vi)
> +GEN_VECTOR_R_VM(vfmacc_vv)
> +GEN_VECTOR_R_VM(vfmacc_vf)
> +GEN_VECTOR_R_VM(vnsra_vv)
> +GEN_VECTOR_R_VM(vnsra_vx)
> +GEN_VECTOR_R_VM(vnsra_vi)
> +GEN_VECTOR_R_VM(vmacc_vv)
> +GEN_VECTOR_R_VM(vmacc_vx)
> +GEN_VECTOR_R_VM(vfnmacc_vv)
> +GEN_VECTOR_R_VM(vfnmacc_vf)
> +GEN_VECTOR_R_VM(vnclipu_vv)
> +GEN_VECTOR_R_VM(vnclipu_vx)
> +GEN_VECTOR_R_VM(vnclipu_vi)
> +GEN_VECTOR_R_VM(vfmsac_vv)
> +GEN_VECTOR_R_VM(vfmsac_vf)
> +GEN_VECTOR_R_VM(vnclip_vv)
> +GEN_VECTOR_R_VM(vnclip_vx)
> +GEN_VECTOR_R_VM(vnclip_vi)
> +GEN_VECTOR_R_VM(vnmsac_vv)
> +GEN_VECTOR_R_VM(vnmsac_vx)
> +GEN_VECTOR_R_VM(vfnmsac_vv)
> +GEN_VECTOR_R_VM(vfnmsac_vf)
> +GEN_VECTOR_R_VM(vwredsumu_vs)
> +GEN_VECTOR_R_VM(vwaddu_vv)
> +GEN_VECTOR_R_VM(vwaddu_vx)
> +GEN_VECTOR_R_VM(vfwadd_vv)
> +GEN_VECTOR_R_VM(vfwadd_vf)
> +GEN_VECTOR_R_VM(vwredsum_vs)
> +GEN_VECTOR_R_VM(vwadd_vv)
> +GEN_VECTOR_R_VM(vwadd_vx)
> +GEN_VECTOR_R_VM(vfwredsum_vs)
> +GEN_VECTOR_R_VM(vwsubu_vv)
> +GEN_VECTOR_R_VM(vwsubu_vx)
> +GEN_VECTOR_R_VM(vfwsub_vv)
> +GEN_VECTOR_R_VM(vfwsub_vf)
> +GEN_VECTOR_R_VM(vwsub_vv)
> +GEN_VECTOR_R_VM(vwsub_vx)
> +GEN_VECTOR_R_VM(vfwredosum_vs)
> +GEN_VECTOR_R_VM(vwaddu_wv)
> +GEN_VECTOR_R_VM(vwaddu_wx)
> +GEN_VECTOR_R_VM(vfwadd_wv)
> +GEN_VECTOR_R_VM(vfwadd_wf)
> +GEN_VECTOR_R_VM(vwadd_wv)
> +GEN_VECTOR_R_VM(vwadd_wx)
> +GEN_VECTOR_R_VM(vwsubu_wv)
> +GEN_VECTOR_R_VM(vwsubu_wx)
> +GEN_VECTOR_R_VM(vfwsub_wv)
> +GEN_VECTOR_R_VM(vfwsub_wf)
> +GEN_VECTOR_R_VM(vwsub_wv)
> +GEN_VECTOR_R_VM(vwsub_wx)
> +GEN_VECTOR_R_VM(vwmulu_vv)
> +GEN_VECTOR_R_VM(vwmulu_vx)
> +GEN_VECTOR_R_VM(vfwmul_vv)
> +GEN_VECTOR_R_VM(vfwmul_vf)
> +GEN_VECTOR_R_VM(vwmulsu_vv)
> +GEN_VECTOR_R_VM(vwmulsu_vx)
> +GEN_VECTOR_R_VM(vwmul_vv)
> +GEN_VECTOR_R_VM(vwmul_vx)
> +GEN_VECTOR_R_VM(vwsmaccu_vv)
> +GEN_VECTOR_R_VM(vwsmaccu_vx)
> +GEN_VECTOR_R_VM(vwmaccu_vv)
> +GEN_VECTOR_R_VM(vwmaccu_vx)
> +GEN_VECTOR_R_VM(vfwmacc_vv)
> +GEN_VECTOR_R_VM(vfwmacc_vf)
> +GEN_VECTOR_R_VM(vwsmacc_vv)
> +GEN_VECTOR_R_VM(vwsmacc_vx)
> +GEN_VECTOR_R_VM(vwmacc_vv)
> +GEN_VECTOR_R_VM(vwmacc_vx)
> +GEN_VECTOR_R_VM(vfwnmacc_vv)
> +GEN_VECTOR_R_VM(vfwnmacc_vf)
> +GEN_VECTOR_R_VM(vwsmaccsu_vv)
> +GEN_VECTOR_R_VM(vwsmaccsu_vx)
> +GEN_VECTOR_R_VM(vwmaccsu_vv)
> +GEN_VECTOR_R_VM(vwmaccsu_vx)
> +GEN_VECTOR_R_VM(vfwmsac_vv)
> +GEN_VECTOR_R_VM(vfwmsac_vf)
> +GEN_VECTOR_R_VM(vwsmaccus_vx)
> +GEN_VECTOR_R_VM(vwmaccus_vx)
> +GEN_VECTOR_R_VM(vfwnmsac_vv)
> +GEN_VECTOR_R_VM(vfwnmsac_vf)
> +GEN_VECTOR_R2_ZIMM(vsetvli)
> +GEN_VECTOR_R(vsetvl)
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 8d6ab73..587c23e 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -706,6 +706,7 @@ static bool gen_shift(DisasContext *ctx, arg_r *a,
> #include "insn_trans/trans_rva.inc.c"
> #include "insn_trans/trans_rvf.inc.c"
> #include "insn_trans/trans_rvd.inc.c"
> +#include "insn_trans/trans_rvv.inc.c"
> #include "insn_trans/trans_privileged.inc.c"
>
> /*
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> new file mode 100644
> index 0000000..1f8f1ec
> --- /dev/null
> +++ b/target/riscv/vector_helper.c
> @@ -0,0 +1,26563 @@
> +/*
> + * RISC-V Vectore Extension Helpers for QEMU.
> + *
> + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
> for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
> along with
> + * this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/log.h"
> +#include "cpu.h"
> +#include "qemu/main-loop.h"
> +#include "exec/exec-all.h"
> +#include "exec/helper-proto.h"
> +#include "exec/translator.h"
> +#include "exec/cpu_ldst.h"
> +#include <math.h>
> +#include "instmap.h"
> +
> +#define VECTOR_HELPER(name) HELPER(glue(vector_, name))
> +#define SIGNBIT8 (1 << 7)
> +#define MAX_U8 ((uint8_t)0xff)
> +#define MIN_U8 ((uint8_t)0x0)
> +#define MAX_S8 ((int8_t)0x7f)
> +#define MIN_S8 ((int8_t)0x80)
> +#define SIGNBIT16 (1 << 15)
> +#define MAX_U16 ((uint16_t)0xffff)
> +#define MIN_U16 ((uint16_t)0x0)
> +#define MAX_S16 ((int16_t)0x7fff)
> +#define MIN_S16 ((int16_t)0x8000)
> +#define SIGNBIT32 (1 << 31)
> +#define MAX_U32 ((uint32_t)0xffffffff)
> +#define MIN_U32 ((uint32_t)0x0)
> +#define MAX_S32 ((int32_t)0x7fffffff)
> +#define MIN_S32 ((int32_t)0x80000000)
> +#define SIGNBIT64 ((uint64_t)1 << 63)
> +#define MAX_U64 ((uint64_t)0xffffffffffffffff)
> +#define MIN_U64 ((uint64_t)0x0)
> +#define MAX_S64 ((int64_t)0x7fffffffffffffff)
> +#define MIN_S64 ((int64_t)0x8000000000000000)
> +
> +static int64_t sign_extend(int64_t a, int8_t width)
> +{
> + return a << (64 - width) >> (64 - width);
> +}
> +
> +static int64_t extend_gpr(target_ulong reg)
> +{
> + return sign_extend(reg, sizeof(target_ulong) * 8);
> +}
> +
> +static target_ulong vector_get_index(CPURISCVState *env, int rs1, int rs2,
> + int index, int mem, int width, int nf)
> +{
> + target_ulong abs_off, base = env->gpr[rs1];
> + target_long offset;
> + switch (width) {
> + case 8:
> + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + nf * mem;
> + break;
> + case 16:
> + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + nf *
> mem;
> + break;
> + case 32:
> + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + nf *
> mem;
> + break;
> + case 64:
> + offset = env->vfp.vreg[rs2].s64[index] + nf * mem;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return 0;
> + }
> + if (offset < 0) {
> + abs_off = ~offset + 1;
> + if (base >= abs_off) {
> + return base - abs_off;
> + }
> + } else {
> + if ((target_ulong)((target_ulong)offset + base) >= base) {
> + return (target_ulong)offset + base;
> + }
> + }
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return 0;
> +}
> +
> +
> +
> +/* ADD/SUB/COMPARE instructions. */
> +static inline uint8_t sat_add_u8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint8_t res = a + b;
> + if (res < a) {
> + res = MAX_U8;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint16_t sat_add_u16(CPURISCVState *env, uint16_t a,
> uint16_t b)
> +{
> + uint16_t res = a + b;
> + if (res < a) {
> + res = MAX_U16;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint32_t sat_add_u32(CPURISCVState *env, uint32_t a,
> uint32_t b)
> +{
> + uint32_t res = a + b;
> + if (res < a) {
> + res = MAX_U32;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint64_t sat_add_u64(CPURISCVState *env, uint64_t a,
> uint64_t b)
> +{
> + uint64_t res = a + b;
> + if (res < a) {
> + res = MAX_U64;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint8_t sat_add_s8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint8_t res = a + b;
> + if (((res ^ a) & SIGNBIT8) && !((a ^ b) & SIGNBIT8)) {
> + res = ~(((int8_t)a >> 7) ^ SIGNBIT8);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint16_t sat_add_s16(CPURISCVState *env, uint16_t a,
> uint16_t b)
> +{
> + uint16_t res = a + b;
> + if (((res ^ a) & SIGNBIT16) && !((a ^ b) & SIGNBIT16)) {
> + res = ~(((int16_t)a >> 15) ^ SIGNBIT16);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint32_t sat_add_s32(CPURISCVState *env, uint32_t a,
> uint32_t b)
> +{
> + uint32_t res = a + b;
> + if (((res ^ a) & SIGNBIT32) && !((a ^ b) & SIGNBIT32)) {
> + res = ~(((int32_t)a >> 31) ^ SIGNBIT32);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint64_t sat_add_s64(CPURISCVState *env, uint64_t a,
> uint64_t b)
> +{
> + uint64_t res = a + b;
> + if (((res ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
> + res = ~(((int64_t)a >> 63) ^ SIGNBIT64);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint8_t sat_sub_u8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint8_t res = a - b;
> + if (res > a) {
> + res = 0;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint16_t sat_sub_u16(CPURISCVState *env, uint16_t a,
> uint16_t b)
> +{
> + uint16_t res = a - b;
> + if (res > a) {
> + res = 0;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint32_t sat_sub_u32(CPURISCVState *env, uint32_t a,
> uint32_t b)
> +{
> + uint32_t res = a - b;
> + if (res > a) {
> + res = 0;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint64_t sat_sub_u64(CPURISCVState *env, uint64_t a,
> uint64_t b)
> +{
> + uint64_t res = a - b;
> + if (res > a) {
> + res = 0;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint8_t sat_sub_s8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint8_t res = a - b;
> + if (((res ^ a) & SIGNBIT8) && ((a ^ b) & SIGNBIT8)) {
> + res = ~(((int8_t)a >> 7) ^ SIGNBIT8);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint16_t sat_sub_s16(CPURISCVState *env, uint16_t a,
> uint16_t b)
> +{
> + uint16_t res = a - b;
> + if (((res ^ a) & SIGNBIT16) && ((a ^ b) & SIGNBIT16)) {
> + res = ~(((int16_t)a >> 15) ^ SIGNBIT16);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint32_t sat_sub_s32(CPURISCVState *env, uint32_t a,
> uint32_t b)
> +{
> + uint32_t res = a - b;
> + if (((res ^ a) & SIGNBIT32) && ((a ^ b) & SIGNBIT32)) {
> + res = ~(((int32_t)a >> 31) ^ SIGNBIT32);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint64_t sat_sub_s64(CPURISCVState *env, uint64_t a,
> uint64_t b)
> +{
> + uint64_t res = a - b;
> + if (((res ^ a) & SIGNBIT64) && ((a ^ b) & SIGNBIT64)) {
> + res = ~(((int64_t)a >> 63) ^ SIGNBIT64);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static uint64_t fix_data_round(CPURISCVState *env, uint64_t result,
> + uint8_t shift)
> +{
> + uint64_t lsb_1 = (uint64_t)1 << shift;
> + int mod = env->vfp.vxrm;
> + int mask = ((uint64_t)1 << shift) - 1;
> +
> + if (mod == 0x0) { /* rnu */
> + return lsb_1 >> 1;
> + } else if (mod == 0x1) { /* rne */
> + if ((result & mask) > (lsb_1 >> 1) ||
> + (((result & mask) == (lsb_1 >> 1)) &&
> + (((result >> shift) & 0x1)) == 1)) {
> + return lsb_1 >> 1;
> + }
> + } else if (mod == 0x3) { /* rod */
> + if (((result & mask) >= 0x1) && (((result >> shift) & 0x1) == 0))
> {
> + return lsb_1;
> + }
> + }
> + return 0;
> +}
> +
> +static int8_t saturate_s8(CPURISCVState *env, int16_t res)
> +{
> + if (res > MAX_S8) {
> + env->vfp.vxsat = 0x1;
> + return MAX_S8;
> + } else if (res < MIN_S8) {
> + env->vfp.vxsat = 0x1;
> + return MIN_S8;
> + } else {
> + return res;
> + }
> +}
> +
> +static uint8_t saturate_u8(CPURISCVState *env, uint16_t res)
> +{
> + if (res > MAX_U8) {
> + env->vfp.vxsat = 0x1;
> + return MAX_U8;
> + } else {
> + return res;
> + }
> +}
> +
> +static uint16_t saturate_u16(CPURISCVState *env, uint32_t res)
> +{
> + if (res > MAX_U16) {
> + env->vfp.vxsat = 0x1;
> + return MAX_U16;
> + } else {
> + return res;
> + }
> +}
> +
> +static uint32_t saturate_u32(CPURISCVState *env, uint64_t res)
> +{
> + if (res > MAX_U32) {
> + env->vfp.vxsat = 0x1;
> + return MAX_U32;
> + } else {
> + return res;
> + }
> +}
> +
> +static int16_t saturate_s16(CPURISCVState *env, int32_t res)
> +{
> + if (res > MAX_S16) {
> + env->vfp.vxsat = 0x1;
> + return MAX_S16;
> + } else if (res < MIN_S16) {
> + env->vfp.vxsat = 0x1;
> + return MIN_S16;
> + } else {
> + return res;
> + }
> +}
> +
> +static int32_t saturate_s32(CPURISCVState *env, int64_t res)
> +{
> + if (res > MAX_S32) {
> + env->vfp.vxsat = 0x1;
> + return MAX_S32;
> + } else if (res < MIN_S32) {
> + env->vfp.vxsat = 0x1;
> + return MIN_S32;
> + } else {
> + return res;
> + }
> +}
> +static uint16_t vwsmaccu_8(CPURISCVState *env, uint8_t a, uint8_t b,
> + uint16_t c)
> +{
> + uint16_t round, res;
> + uint16_t product = (uint16_t)a * (uint16_t)b;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)product, 4);
> + res = (round + product) >> 4;
> + return sat_add_u16(env, c, res);
> +}
> +
> +static uint32_t vwsmaccu_16(CPURISCVState *env, uint16_t a, uint16_t b,
> + uint32_t c)
> +{
> + uint32_t round, res;
> + uint32_t product = (uint32_t)a * (uint32_t)b;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)product, 8);
> + res = (round + product) >> 8;
> + return sat_add_u32(env, c, res);
> +}
> +
> +static uint64_t vwsmaccu_32(CPURISCVState *env, uint32_t a, uint32_t b,
> + uint64_t c)
> +{
> + uint64_t round, res;
> + uint64_t product = (uint64_t)a * (uint64_t)b;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)product, 16);
> + res = (round + product) >> 16;
> + return sat_add_u64(env, c, res);
> +}
> +
> +static int16_t vwsmacc_8(CPURISCVState *env, int8_t a, int8_t b,
> + int16_t c)
> +{
> + int16_t round, res;
> + int16_t product = (int16_t)a * (int16_t)b;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)product, 4);
> + res = (int16_t)(round + product) >> 4;
> + return sat_add_s16(env, c, res);
> +}
> +
> +static int32_t vwsmacc_16(CPURISCVState *env, int16_t a, int16_t b,
> + int32_t c)
> +{
> + int32_t round, res;
> + int32_t product = (int32_t)a * (int32_t)b;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)product, 8);
> + res = (int32_t)(round + product) >> 8;
> + return sat_add_s32(env, c, res);
> +}
> +
> +static int64_t vwsmacc_32(CPURISCVState *env, int32_t a, int32_t b,
> + int64_t c)
> +{
> + int64_t round, res;
> + int64_t product = (int64_t)a * (int64_t)b;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)product, 16);
> + res = (int64_t)(round + product) >> 16;
> + return sat_add_s64(env, c, res);
> +}
> +
> +static int16_t vwsmaccsu_8(CPURISCVState *env, uint8_t a, int8_t b,
> + int16_t c)
> +{
> + int16_t round, res;
> + int16_t product = (uint16_t)a * (int16_t)b;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)product, 4);
> + res = (round + product) >> 4;
> + return sat_sub_s16(env, c, res);
> +}
> +
> +static int32_t vwsmaccsu_16(CPURISCVState *env, uint16_t a, int16_t b,
> + uint32_t c)
> +{
> + int32_t round, res;
> + int32_t product = (uint32_t)a * (int32_t)b;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)product, 8);
> + res = (round + product) >> 8;
> + return sat_sub_s32(env, c, res);
> +}
> +
> +static int64_t vwsmaccsu_32(CPURISCVState *env, uint32_t a, int32_t b,
> + int64_t c)
> +{
> + int64_t round, res;
> + int64_t product = (uint64_t)a * (int64_t)b;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)product, 16);
> + res = (round + product) >> 16;
> + return sat_sub_s64(env, c, res);
> +}
> +
> +static int16_t vwsmaccus_8(CPURISCVState *env, int8_t a, uint8_t b,
> + int16_t c)
> +{
> + int16_t round, res;
> + int16_t product = (int16_t)a * (uint16_t)b;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)product, 4);
> + res = (round + product) >> 4;
> + return sat_sub_s16(env, c, res);
> +}
> +
> +static int32_t vwsmaccus_16(CPURISCVState *env, int16_t a, uint16_t b,
> + int32_t c)
> +{
> + int32_t round, res;
> + int32_t product = (int32_t)a * (uint32_t)b;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)product, 8);
> + res = (round + product) >> 8;
> + return sat_sub_s32(env, c, res);
> +}
> +
> +static uint64_t vwsmaccus_32(CPURISCVState *env, int32_t a, uint32_t b,
> + int64_t c)
> +{
> + int64_t round, res;
> + int64_t product = (int64_t)a * (uint64_t)b;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)product, 16);
> + res = (round + product) >> 16;
> + return sat_sub_s64(env, c, res);
> +}
> +
> +static int8_t vssra_8(CPURISCVState *env, int8_t a, uint8_t b)
> +{
> + int16_t round, res;
> + uint8_t shift = b & 0x7;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return res;
> +}
> +
> +static int16_t vssra_16(CPURISCVState *env, int16_t a, uint16_t b)
> +{
> + int32_t round, res;
> + uint8_t shift = b & 0xf;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static int32_t vssra_32(CPURISCVState *env, int32_t a, uint32_t b)
> +{
> + int64_t round, res;
> + uint8_t shift = b & 0x1f;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static int64_t vssra_64(CPURISCVState *env, int64_t a, uint64_t b)
> +{
> + int64_t round, res;
> + uint8_t shift = b & 0x3f;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a >> (shift - 1)) + (round >> (shift - 1));
> + return res >> 1;
> +}
> +
> +static int8_t vssrai_8(CPURISCVState *env, int8_t a, uint8_t b)
> +{
> + int16_t round, res;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static int16_t vssrai_16(CPURISCVState *env, int16_t a, uint8_t b)
> +{
> + int32_t round, res;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static int32_t vssrai_32(CPURISCVState *env, int32_t a, uint8_t b)
> +{
> + int64_t round, res;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static int64_t vssrai_64(CPURISCVState *env, int64_t a, uint8_t b)
> +{
> + int64_t round, res;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a >> (b - 1)) + (round >> (b - 1));
> + return res >> 1;
> +}
> +
> +static int8_t vnclip_16(CPURISCVState *env, int16_t a, uint8_t b)
> +{
> + int16_t round, res;
> + uint8_t shift = b & 0xf;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_s8(env, res);
> +}
> +
> +static int16_t vnclip_32(CPURISCVState *env, int32_t a, uint16_t b)
> +{
> + int32_t round, res;
> + uint8_t shift = b & 0x1f;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return saturate_s16(env, res);
> +}
> +
> +static int32_t vnclip_64(CPURISCVState *env, int64_t a, uint32_t b)
> +{
> + int64_t round, res;
> + uint8_t shift = b & 0x3f;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_s32(env, res);
> +}
> +
> +static int8_t vnclipi_16(CPURISCVState *env, int16_t a, uint8_t b)
> +{
> + int16_t round, res;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_s8(env, res);
> +}
> +
> +static int16_t vnclipi_32(CPURISCVState *env, int32_t a, uint8_t b)
> +{
> + int32_t round, res;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_s16(env, res);
> +}
> +
> +static int32_t vnclipi_64(CPURISCVState *env, int64_t a, uint8_t b)
> +{
> + int32_t round, res;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_s32(env, res);
> +}
> +
> +static uint8_t vnclipu_16(CPURISCVState *env, uint16_t a, uint8_t b)
> +{
> + uint16_t round, res;
> + uint8_t shift = b & 0xf;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_u8(env, res);
> +}
> +
> +static uint16_t vnclipu_32(CPURISCVState *env, uint32_t a, uint16_t b)
> +{
> + uint32_t round, res;
> + uint8_t shift = b & 0x1f;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_u16(env, res);
> +}
> +
> +static uint32_t vnclipu_64(CPURISCVState *env, uint64_t a, uint32_t b)
> +{
> + uint64_t round, res;
> + uint8_t shift = b & 0x3f;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_u32(env, res);
> +}
> +
> +static uint8_t vnclipui_16(CPURISCVState *env, uint16_t a, uint8_t b)
> +{
> + uint16_t round, res;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_u8(env, res);
> +}
> +
> +static uint16_t vnclipui_32(CPURISCVState *env, uint32_t a, uint8_t b)
> +{
> + uint32_t round, res;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_u16(env, res);
> +}
> +
> +static uint32_t vnclipui_64(CPURISCVState *env, uint64_t a, uint8_t b)
> +{
> + uint64_t round, res;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_u32(env, res);
> +}
> +
> +static uint8_t vssrl_8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint16_t round, res;
> + uint8_t shift = b & 0x7;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static uint16_t vssrl_16(CPURISCVState *env, uint16_t a, uint16_t b)
> +{
> + uint32_t round, res;
> + uint8_t shift = b & 0xf;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static uint32_t vssrl_32(CPURISCVState *env, uint32_t a, uint32_t b)
> +{
> + uint64_t round, res;
> + uint8_t shift = b & 0x1f;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static uint64_t vssrl_64(CPURISCVState *env, uint64_t a, uint64_t b)
> +{
> + uint64_t round, res;
> + uint8_t shift = b & 0x3f;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a >> (shift - 1)) + (round >> (shift - 1));
> + return res >> 1;
> +}
> +
> +static uint8_t vssrli_8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint16_t round, res;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static uint16_t vssrli_16(CPURISCVState *env, uint16_t a, uint8_t b)
> +{
> + uint32_t round, res;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static uint32_t vssrli_32(CPURISCVState *env, uint32_t a, uint8_t b)
> +{
> + uint64_t round, res;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static uint64_t vssrli_64(CPURISCVState *env, uint64_t a, uint8_t b)
> +{
> + uint64_t round, res;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a >> (b - 1)) + (round >> (b - 1));
> + return res >> 1;
> +}
> +
> +static int8_t vsmul_8(CPURISCVState *env, int8_t a, int8_t b)
> +{
> + int16_t round;
> + int8_t res;
> + int16_t product = (int16_t)a * (int16_t)b;
> +
> + if (a == MIN_S8 && b == MIN_S8) {
> + env->vfp.vxsat = 1;
> +
> + return MAX_S8;
> + }
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)product, 7);
> + res = sat_add_s16(env, product, round) >> 7;
> + return res;
> +}
> +
> +
> +static int16_t vsmul_16(CPURISCVState *env, int16_t a, int16_t b)
> +{
> + int32_t round;
> + int16_t res;
> + int32_t product = (int32_t)a * (int32_t)b;
> +
> + if (a == MIN_S16 && b == MIN_S16) {
> + env->vfp.vxsat = 1;
> +
> + return MAX_S16;
> + }
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)product, 15);
> + res = sat_add_s32(env, product, round) >> 15;
> + return res;
> +}
> +
> +static int32_t vsmul_32(CPURISCVState *env, int32_t a, int32_t b)
> +{
> + int64_t round;
> + int32_t res;
> + int64_t product = (int64_t)a * (int64_t)b;
> +
> + if (a == MIN_S32 && b == MIN_S32) {
> + env->vfp.vxsat = 1;
> +
> + return MAX_S32;
> + }
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)product, 31);
> + res = sat_add_s64(env, product, round) >> 31;
> + return res;
> +}
> +
> +
> +static int64_t vsmul_64(CPURISCVState *env, int64_t a, int64_t b)
> +{
> + int64_t res;
> + uint64_t abs_a = a, abs_b = b;
> + uint64_t lo_64, hi_64, carry, round;
> +
> + if (a == MIN_S64 && b == MIN_S64) {
> + env->vfp.vxsat = 1;
> +
> + return MAX_S64;
> + }
> +
> + if (a < 0) {
> + abs_a = ~a + 1;
> + }
> + if (b < 0) {
> + abs_b = ~b + 1;
> + }
> +
> + /* first get the whole product in {hi_64, lo_64} */
> + uint64_t a_hi = abs_a >> 32;
> + uint64_t a_lo = (uint32_t)abs_a;
> + uint64_t b_hi = abs_b >> 32;
> + uint64_t b_lo = (uint32_t)abs_b;
> +
> + /*
> + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> + * (a_lo * b_hi) << 32 + a_lo * b_lo
> + * = {hi_64, lo_64}
> + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo *
> b_lo)) >> 64
> + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32)
> >> 32
> + */
> +
> + lo_64 = abs_a * abs_b;
> + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + (uint64_t)(uint32_t)(a_lo * b_hi) +
> + ((a_lo * b_lo) >> 32)) >> 32;
> +
> + hi_64 = a_hi * b_hi +
> + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> + carry;
> +
> + if ((a ^ b) & SIGNBIT64) {
> + lo_64 = ~lo_64;
> + hi_64 = ~hi_64;
> + if (lo_64 == MAX_U64) {
> + lo_64 = 0;
> + hi_64 += 1;
> + } else {
> + lo_64 += 1;
> + }
> + }
> +
> + /* set rem and res */
> + round = fix_data_round(env, lo_64, 63);
> + if ((lo_64 + round) < lo_64) {
> + hi_64 += 1;
> + res = (hi_64 << 1);
> + } else {
> + res = (hi_64 << 1) | ((lo_64 + round) >> 63);
> + }
> +
> + return res;
> +}
> +static inline int8_t avg_round_s8(CPURISCVState *env, int8_t a, int8_t b)
> +{
> + int16_t round;
> + int8_t res;
> + int16_t sum = a + b;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)sum, 1);
> + res = (sum + round) >> 1;
> +
> + return res;
> +}
> +
> +static inline int16_t avg_round_s16(CPURISCVState *env, int16_t a,
> int16_t b)
> +{
> + int32_t round;
> + int16_t res;
> + int32_t sum = a + b;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)sum, 1);
> + res = (sum + round) >> 1;
> +
> + return res;
> +}
> +
> +static inline int32_t avg_round_s32(CPURISCVState *env, int32_t a,
> int32_t b)
> +{
> + int64_t round;
> + int32_t res;
> + int64_t sum = a + b;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)sum, 1);
> + res = (sum + round) >> 1;
> +
> + return res;
> +}
> +
> +static inline int64_t avg_round_s64(CPURISCVState *env, int64_t a,
> int64_t b)
> +{
> + int64_t rem = (a & 0x1) + (b & 0x1);
> + int64_t res = (a >> 1) + (b >> 1) + (rem >> 1);
> + int mod = env->vfp.vxrm;
> +
> + if (mod == 0x0) { /* rnu */
> + if (rem == 0x1) {
> + return res + 1;
> + }
> + } else if (mod == 0x1) { /* rne */
> + if ((rem & 0x1) == 1 && ((res & 0x1) == 1)) {
> + return res + 1;
> + }
> + } else if (mod == 0x3) { /* rod */
> + if (((rem & 0x1) >= 0x1) && (res & 0x1) == 0) {
> + return res + 1;
> + }
> + }
> + return res;
> +}
> +
> +static target_ulong helper_fclass_h(uint64_t frs1)
> +{
> + float16 f = frs1;
> + bool sign = float16_is_neg(f);
> +
> + if (float16_is_infinity(f)) {
> + return sign ? 1 << 0 : 1 << 7;
> + } else if (float16_is_zero(f)) {
> + return sign ? 1 << 3 : 1 << 4;
> + } else if (float16_is_zero_or_denormal(f)) {
> + return sign ? 1 << 2 : 1 << 5;
> + } else if (float16_is_any_nan(f)) {
> + float_status s = { }; /* for snan_bit_is_one */
> + return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
> + } else {
> + return sign ? 1 << 1 : 1 << 6;
> + }
> +}
> +
> +static inline bool vector_vtype_ill(CPURISCVState *env)
> +{
> + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline void vector_vtype_set_ill(CPURISCVState *env)
> +{
> + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1);
> + return;
> +}
> +
> +static inline int vector_vtype_get_sew(CPURISCVState *env)
> +{
> + return (env->vfp.vtype >> 2) & 0x7;
> +}
> +
> +static inline int vector_get_width(CPURISCVState *env)
> +{
> + return 8 * (1 << vector_vtype_get_sew(env));
> +}
> +
> +static inline int vector_get_lmul(CPURISCVState *env)
> +{
> + return 1 << (env->vfp.vtype & 0x3);
> +}
> +
> +static inline int vector_get_vlmax(CPURISCVState *env)
> +{
> + return vector_get_lmul(env) * VLEN / vector_get_width(env);
> +}
> +
> +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int
> width,
> + int lmul, int index)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / 8;
> + int pos = (index * mlen) % 8;
> +
> + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1);
> +}
> +
> +static inline bool vector_overlap_vm_common(int lmul, int vm, int rd)
> +{
> + if (lmul > 1 && vm == 0 && rd == 0) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline bool vector_overlap_vm_force(int vm, int rd)
> +{
> + if (vm == 0 && rd == 0) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline bool vector_overlap_carry(int lmul, int rd)
> +{
> + if (lmul > 1 && rd == 0) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline bool vector_overlap_dstgp_srcgp(int rd, int dlen, int rs,
> + int slen)
> +{
> + if ((rd >= rs && rd < rs + slen) || (rs >= rd && rs < rd + dlen)) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline uint64_t vector_get_mask(int start, int end)
> +{
> + return ((uint64_t)(~((uint64_t)0))) << (63 - end + start) >> (63 -
> end);
> +}
> +
> +/* fetch unsigned element by width */
> +static inline uint64_t vector_get_iu_elem(CPURISCVState *env, uint32_t
> width,
> + uint32_t rs2, uint32_t index)
> +{
> + uint64_t elem;
> + if (width == 8) {
> + elem = env->vfp.vreg[rs2].u8[index];
> + } else if (width == 16) {
> + elem = env->vfp.vreg[rs2].u16[index];
> + } else if (width == 32) {
> + elem = env->vfp.vreg[rs2].u32[index];
> + } else if (width == 64) {
> + elem = env->vfp.vreg[rs2].u64[index];
> + } else { /* the max of (XLEN, FLEN) is no bigger than 64 */
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return 0;
> + }
> + return elem;
> +}
> +
> +static inline int vector_mask_reg(CPURISCVState *env, uint32_t reg, int
> width,
> + int lmul, int index)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / 8;
> + int pos = (index * mlen) % 8;
> + return (env->vfp.vreg[reg].u8[idx] >> pos) & 0x1;
> +}
> +
> +static inline void vector_mask_result(CPURISCVState *env, uint32_t reg,
> + int width, int lmul, int index, uint32_t result)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / width;
> + int pos = (index * mlen) % width;
> + uint64_t mask = ~((((uint64_t)1 << mlen) - 1) << pos);
> +
> + switch (width) {
> + case 8:
> + env->vfp.vreg[reg].u8[idx] = (env->vfp.vreg[reg].u8[idx] & mask)
> + | (result << pos);
> + break;
> + case 16:
> + env->vfp.vreg[reg].u16[idx] = (env->vfp.vreg[reg].u16[idx] & mask)
> + | (result << pos);
> + break;
> + case 32:
> + env->vfp.vreg[reg].u32[idx] = (env->vfp.vreg[reg].u32[idx] & mask)
> + | (result << pos);
> + break;
> + case 64:
> + env->vfp.vreg[reg].u64[idx] = (env->vfp.vreg[reg].u64[idx] & mask)
> + | ((uint64_t)result <<
> pos);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> +
> + return;
> +}
> +
> +/**
> + * deposit16:
> + * @value: initial value to insert bit field into
> + * @start: the lowest bit in the bit field (numbered from 0)
> + * @length: the length of the bit field
> + * @fieldval: the value to insert into the bit field
> + *
> + * Deposit @fieldval into the 16 bit @value at the bit field specified
> + * by the @start and @length parameters, and return the modified
> + * @value. Bits of @value outside the bit field are not modified.
> + * Bits of @fieldval above the least significant @length bits are
> + * ignored. The bit field must lie entirely within the 16 bit word.
> + * It is valid to request that all 16 bits are modified (ie @length
> + * 16 and @start 0).
> + *
> + * Returns: the modified @value.
> + */
> +static inline uint16_t deposit16(uint16_t value, int start, int length,
> + uint16_t fieldval)
> +{
> + uint16_t mask;
> + assert(start >= 0 && length > 0 && length <= 16 - start);
> + mask = (~0U >> (16 - length)) << start;
> + return (value & ~mask) | ((fieldval << start) & mask);
> +}
> +
> +static void vector_tail_amo(CPURISCVState *env, int vreg, int index, int
> width)
> +{
> + switch (width) {
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 64:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_common(CPURISCVState *env, int vreg, int index,
> + int width)
> +{
> + switch (width) {
> + case 8:
> + env->vfp.vreg[vreg].u8[index] = 0;
> + break;
> + case 16:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 64:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_segment(CPURISCVState *env, int vreg, int index,
> + int width, int nf, int lmul)
> +{
> + switch (width) {
> + case 8:
> + while (nf >= 0) {
> + env->vfp.vreg[vreg + nf * lmul].u8[index] = 0;
> + nf--;
> + }
> + break;
> + case 16:
> + while (nf >= 0) {
> + env->vfp.vreg[vreg + nf * lmul].u16[index] = 0;
> + nf--;
> + }
> + break;
> + case 32:
> + while (nf >= 0) {
> + env->vfp.vreg[vreg + nf * lmul].u32[index] = 0;
> + nf--;
> + }
> + break;
> + case 64:
> + while (nf >= 0) {
> + env->vfp.vreg[vreg + nf * lmul].u64[index] = 0;
> + nf--;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_widen(CPURISCVState *env, int vreg, int index,
> + int width)
> +{
> + switch (width) {
> + case 8:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 16:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_narrow(CPURISCVState *env, int vreg, int index,
> + int width)
> +{
> + switch (width) {
> + case 8:
> + env->vfp.vreg[vreg].u8[index] = 0;
> + break;
> + case 16:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_fcommon(CPURISCVState *env, int vreg, int index,
> + int width)
> +{
> + switch (width) {
> + case 16:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 64:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_fwiden(CPURISCVState *env, int vreg, int index,
> + int width)
> +{
> + switch (width) {
> + case 16:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_fnarrow(CPURISCVState *env, int vreg, int index,
> + int width)
> +{
> + switch (width) {
> + case 16:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +static inline int vector_get_carry(CPURISCVState *env, int width, int
> lmul,
> + int index)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / 8;
> + int pos = (index * mlen) % 8;
> +
> + return (env->vfp.vreg[0].u8[idx] >> pos) & 0x1;
> +}
> +
> +static inline void vector_get_layout(CPURISCVState *env, int width, int
> lmul,
> + int index, int *idx, int *pos)
> +{
> + int mlen = width / lmul;
> + *idx = (index * mlen) / 8;
> + *pos = (index * mlen) % 8;
> +}
> +
> +static bool vector_lmul_check_reg(CPURISCVState *env, uint32_t lmul,
> + uint32_t reg, bool widen)
> +{
> + int legal = widen ? (lmul * 2) : lmul;
> +
> + if ((lmul != 1 && lmul != 2 && lmul != 4 && lmul != 8) ||
> + (lmul == 8 && widen)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return false;
> + }
> +
> + if (reg % legal != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return false;
> + }
> + return true;
> +}
> +
> +static inline uint64_t u64xu64_lh(uint64_t a, uint64_t b)
> +{
> + uint64_t hi_64, carry;
> +
> +
> + /* first get the whole product in {hi_64, lo_64} */
> + uint64_t a_hi = a >> 32;
> + uint64_t a_lo = (uint32_t)a;
> + uint64_t b_hi = b >> 32;
> + uint64_t b_lo = (uint32_t)b;
> +
> + /*
> + * a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> + * (a_lo * b_hi) << 32 + a_lo * b_lo
> + * = {hi_64, lo_64}
> + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo *
> b_lo)) >> 64
> + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32)
> >> 32
> + */
> +
> + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + (uint64_t)(uint32_t)(a_lo * b_hi) +
> + ((a_lo * b_lo) >> 32)) >> 32;
> +
> + hi_64 = a_hi * b_hi +
> + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> + carry;
> +
> + return hi_64;
> +}
> +
> +
> +static inline int64_t s64xu64_lh(int64_t a, uint64_t b)
> +{
> + uint64_t abs_a = a;
> + uint64_t lo_64, hi_64, carry;
> +
> + if (a < 0) {
> + abs_a = ~a + 1;
> + }
> +
> + /* first get the whole product in {hi_64, lo_64} */
> + uint64_t a_hi = abs_a >> 32;
> + uint64_t a_lo = (uint32_t)abs_a;
> + uint64_t b_hi = b >> 32;
> + uint64_t b_lo = (uint32_t)b;
> +
> + /*
> + * abs_a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> + * (a_lo * b_hi) << 32 + a_lo * b_lo
> + * = {hi_64, lo_64}
> + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo *
> b_lo)) >> 64
> + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32)
> >> 32
> + */
> +
> + lo_64 = abs_a * b;
> + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + (uint64_t)(uint32_t)(a_lo * b_hi) +
> + ((a_lo * b_lo) >> 32)) >> 32;
> +
> + hi_64 = a_hi * b_hi +
> + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> + carry;
> + if ((a ^ b) & SIGNBIT64) {
> + lo_64 = ~lo_64;
> + hi_64 = ~hi_64;
> + if (lo_64 == MAX_U64) {
> + lo_64 = 0;
> + hi_64 += 1;
> + } else {
> + lo_64 += 1;
> + }
> + }
> + return hi_64;
> +}
> +
> +
> +static inline int64_t s64xs64_lh(int64_t a, int64_t b)
> +{
> + uint64_t abs_a = a, abs_b = b;
> + uint64_t lo_64, hi_64, carry;
> +
> + if (a < 0) {
> + abs_a = ~a + 1;
> + }
> + if (b < 0) {
> + abs_b = ~b + 1;
> + }
> +
> + /* first get the whole product in {hi_64, lo_64} */
> + uint64_t a_hi = abs_a >> 32;
> + uint64_t a_lo = (uint32_t)abs_a;
> + uint64_t b_hi = abs_b >> 32;
> + uint64_t b_lo = (uint32_t)abs_b;
> +
> + /*
> + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> + * (a_lo * b_hi) << 32 + a_lo * b_lo
> + * = {hi_64, lo_64}
> + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo *
> b_lo)) >> 64
> + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32)
> >> 32
> + */
> +
> + lo_64 = abs_a * abs_b;
> + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + (uint64_t)(uint32_t)(a_lo * b_hi) +
> + ((a_lo * b_lo) >> 32)) >> 32;
> +
> + hi_64 = a_hi * b_hi +
> + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> + carry;
> +
> + if ((a ^ b) & SIGNBIT64) {
> + lo_64 = ~lo_64;
> + hi_64 = ~hi_64;
> + if (lo_64 == MAX_U64) {
> + lo_64 = 0;
> + hi_64 += 1;
> + } else {
> + lo_64 += 1;
> + }
> + }
> + return hi_64;
> +}
> +
> +void VECTOR_HELPER(vsetvl)(CPURISCVState *env, uint32_t rs1, uint32_t rs2,
> + uint32_t rd)
> +{
> + int sew, max_sew, vlmax, vl;
> +
> + if (rs2 == 0) {
> + vector_vtype_set_ill(env);
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + env->vfp.vtype = env->gpr[rs2];
> + sew = 1 << vector_get_width(env) / 8;
> + max_sew = sizeof(target_ulong);
> +
> +
> + if (env->misa & RVD) {
> + max_sew = max_sew > 8 ? max_sew : 8;
> + } else if (env->misa & RVF) {
> + max_sew = max_sew > 4 ? max_sew : 4;
> + }
> + if (sew > max_sew) {
> + vector_vtype_set_ill(env);
> + return;
> + }
> +
> + vlmax = vector_get_vlmax(env);
> + if (rs1 == 0) {
> + vl = vlmax;
> + } else if (env->gpr[rs1] <= vlmax) {
> + vl = env->gpr[rs1];
> + } else if (env->gpr[rs1] < 2 * vlmax) {
> + vl = ceil(env->gpr[rs1] / 2);
> + } else {
> + vl = vlmax;
> + }
> + env->vfp.vl = vl;
> + env->gpr[rd] = vl;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsetvli)(CPURISCVState *env, uint32_t rs1, uint32_t
> zimm,
> + uint32_t rd)
> +{
> + int sew, max_sew, vlmax, vl;
> +
> + env->vfp.vtype = zimm;
> + sew = vector_get_width(env) / 8;
> + max_sew = sizeof(target_ulong);
> +
> + if (env->misa & RVD) {
> + max_sew = max_sew > 8 ? max_sew : 8;
> + } else if (env->misa & RVF) {
> + max_sew = max_sew > 4 ? max_sew : 4;
> + }
> + if (sew > max_sew) {
> + vector_vtype_set_ill(env);
> + return;
> + }
> +
> + vlmax = vector_get_vlmax(env);
> + if (rs1 == 0) {
> + vl = vlmax;
> + } else if (env->gpr[rs1] <= vlmax) {
> + vl = env->gpr[rs1];
> + } else if (env->gpr[rs1] < 2 * vlmax) {
> + vl = ceil(env->gpr[rs1] / 2);
> + } else {
> + vl = vlmax;
> + }
> + env->vfp.vl = vl;
> + env->gpr[rd] = vl;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vrgather.vv vd, vs2, vs1, vm #
> + * vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]];
> + */
> +void VECTOR_HELPER(vrgather_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src, src1;
> + uint32_t index;
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + index = env->vfp.vreg[src1].u8[j];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u8[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[index];
> + }
> + }
> + break;
> + case 16:
> + index = env->vfp.vreg[src1].u16[j];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u16[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[index];
> + }
> + }
> + break;
> + case 32:
> + index = env->vfp.vreg[src1].u32[j];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u32[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[index];
> + }
> + }
> + break;
> + case 64:
> + index = env->vfp.vreg[src1].u64[j];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u64[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[index];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 :
> vs2[rs1] */
> +void VECTOR_HELPER(vrgather_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src;
> + uint32_t index;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + index = env->gpr[rs1];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u8[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[index];
> + }
> + }
> + break;
> + case 16:
> + index = env->gpr[rs1];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u16[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[index];
> + }
> + }
> + break;
> + case 32:
> + index = env->gpr[rs1];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u32[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[index];
> + }
> + }
> + break;
> + case 64:
> + index = env->gpr[rs1];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u64[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[index];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm]
> */
> +void VECTOR_HELPER(vrgather_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src;
> + uint32_t index;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + index = rs1;
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u8[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[index];
> + }
> + }
> + break;
> + case 16:
> + index = rs1;
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u16[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[index];
> + }
> + }
> + break;
> + case 32:
> + index = rs1;
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u32[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[index];
> + }
> + }
> + break;
> + case 64:
> + index = rs1;
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u64[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[index];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vext_x_v)(CPURISCVState *env, uint32_t rs1, uint32_t
> rs2,
> + uint32_t rd)
> +{
> + int width;
> + uint64_t elem;
> + target_ulong index = env->gpr[rs1];
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> +
> + elem = vector_get_iu_elem(env, width, rs2, index);
> + if (index >= VLEN / width) { /* index is too big */
> + env->gpr[rd] = 0;
> + } else {
> + env->gpr[rd] = elem;
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmv.f.s rd, vs2 # rd = vs2[0] (rs1=0) */
> +void VECTOR_HELPER(vfmv_f_s)(CPURISCVState *env, uint32_t rs1, uint32_t
> rs2,
> + uint32_t rd)
> +{
> + int width, flen;
> + uint64_t mask;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->misa & RVD) {
> + flen = 8;
> + } else if (env->misa & RVF) {
> + flen = 4;
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + mask = (~((uint64_t)0)) << width;
> +
> + if (width == 8) {
> + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s8[0] | mask;
> + } else if (width == 16) {
> + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s16[0] | mask;
> + } else if (width == 32) {
> + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s32[0] | mask;
> + } else if (width == 64) {
> + if (flen == 4) {
> + env->fpr[rd] = env->vfp.vreg[rs2].s64[0] & 0xffffffff;
> + } else {
> + env->fpr[rd] = env->vfp.vreg[rs2].s64[0];
> + }
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmv.s.x vd, rs1 # vd[0] = rs1 */
> +void VECTOR_HELPER(vmv_s_x)(CPURISCVState *env, uint32_t rs1, uint32_t
> rs2,
> + uint32_t rd)
> +{
> + int width;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= env->vfp.vl) {
> + return;
> + }
> +
> + memset(&env->vfp.vreg[rd].u8[0], 0, VLEN / 8);
> + width = vector_get_width(env);
> +
> + if (width == 8) {
> + env->vfp.vreg[rd].u8[0] = env->gpr[rs1];
> + } else if (width == 16) {
> + env->vfp.vreg[rd].u16[0] = env->gpr[rs1];
> + } else if (width == 32) {
> + env->vfp.vreg[rd].u32[0] = env->gpr[rs1];
> + } else if (width == 64) {
> + env->vfp.vreg[rd].u64[0] = env->gpr[rs1];
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2 = 0) */
> +void VECTOR_HELPER(vfmv_s_f)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, flen;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= env->vfp.vl) {
> + return;
> + }
> + if (env->misa & RVD) {
> + flen = 8;
> + } else if (env->misa & RVF) {
> + flen = 4;
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> +
> + if (width == 8) {
> + env->vfp.vreg[rd].u8[0] = env->fpr[rs1];
> + } else if (width == 16) {
> + env->vfp.vreg[rd].u16[0] = env->fpr[rs1];
> + } else if (width == 32) {
> + env->vfp.vreg[rd].u32[0] = env->fpr[rs1];
> + } else if (width == 64) {
> + if (flen == 4) { /* 1-extended to FLEN bits */
> + env->vfp.vreg[rd].u64[0] = (uint64_t)env->fpr[rs1]
> + | 0xffffffff00000000;
> + } else {
> + env->vfp.vreg[rd].u64[0] = env->fpr[rs1];
> + }
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
> +void VECTOR_HELPER(vslideup_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax, offset;
> + int i, j, dest, src, k;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + offset = env->gpr[rs1];
> +
> + if (offset < env->vfp.vstart) {
> + offset = env->vfp.vstart;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i - offset) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i - offset) % (VLEN / width);
> + if (i < offset) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslideup.vi vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
> +void VECTOR_HELPER(vslideup_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax, offset;
> + int i, j, dest, src, k;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + offset = rs1;
> +
> + if (offset < env->vfp.vstart) {
> + offset = env->vfp.vstart;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i - offset) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i - offset) % (VLEN / width);
> + if (i < offset) {
> + continue;
> + } else if (i < vl) {
> + if (width == 8) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[k];
> + }
> + } else if (width == 16) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + }
> + } else if (width == 32) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + }
> + } else if (width == 64) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + }
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
> +void VECTOR_HELPER(vslide1up_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src, k;
> + uint64_t s1;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + s1 = env->gpr[rs1];
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i - 1) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i - 1) % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i == 0 && env->vfp.vstart == 0) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = s1;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = s1;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = s1;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = s1;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src].u8[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i + rs1] */
> +void VECTOR_HELPER(vslidedown_vx)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax, offset;
> + int i, j, dest, src, k;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + offset = env->gpr[rs1];
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i + offset) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i + offset) % (VLEN / width);
> + if (i < offset) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[k];
> + } else {
> + env->vfp.vreg[dest].u8[j] = 0;
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + } else {
> + env->vfp.vreg[dest].u16[j] = 0;
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + } else {
> + env->vfp.vreg[dest].u32[j] = 0;
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + } else {
> + env->vfp.vreg[dest].u64[j] = 0;
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vslidedown_vi)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax, offset;
> + int i, j, dest, src, k;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + offset = rs1;
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i + offset) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i + offset) % (VLEN / width);
> + if (i < offset) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[k];
> + } else {
> + env->vfp.vreg[dest].u8[j] = 0;
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + } else {
> + env->vfp.vreg[dest].u16[j] = 0;
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + } else {
> + env->vfp.vreg[dest].u32[j] = 0;
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + } else {
> + env->vfp.vreg[dest].u64[j] = 0;
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslide1down.vx vd, vs2, rs1, vm # vd[vl - 1]=x[rs1], vd[i] = vs2[i +
> 1] */
> +void VECTOR_HELPER(vslide1down_vx)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src, k;
> + uint64_t s1;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + s1 = env->gpr[rs1];
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i + 1) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i + 1) % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i == vl - 1 && i >= env->vfp.vstart) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = s1;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = s1;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = s1;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = s1;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else if (i < vl - 1) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src].u8[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vcompress.vm vd, vs2, vs1
> + * Compress into vd elements of vs2 where vs1 is enabled
> + */
> +void VECTOR_HELPER(vcompress_vm)(CPURISCVState *env, uint32_t rs1,
> uint32_t rs2,
> + uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src;
> + uint32_t vd_idx, num = 0;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, 1)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + /* zeroed all elements */
> + for (i = 0; i < lmul; i++) {
> + memset(&env->vfp.vreg[rd + i].u64[0], 0, VLEN / 8);
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (num / (VLEN / width));
> + src = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + vd_idx = num % (VLEN / width);
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_mask_reg(env, rs1, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[vd_idx] =
> + env->vfp.vreg[src].u8[j];
> + num++;
> + }
> + break;
> + case 16:
> + if (vector_mask_reg(env, rs1, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[vd_idx] =
> + env->vfp.vreg[src].u16[j];
> + num++;
> + }
> + break;
> + case 32:
> + if (vector_mask_reg(env, rs1, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[vd_idx] =
> + env->vfp.vreg[src].u32[j];
> + num++;
> + }
> + break;
> + case 64:
> + if (vector_mask_reg(env, rs1, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[vd_idx] =
> + env->vfp.vreg[src].u64[j];
> + num++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
> + + env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + + env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + + env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + + env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + + env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredsum.vs vd, vs2, vs1, vm # vd[0] = sum(vs1[0] , vs2[*]) */
> +void VECTOR_HELPER(vredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> +
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t sum = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u8[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u8[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = sum;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u16[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u16[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = sum;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u32[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u32[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = sum;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u64[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u64[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = sum;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfadd.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_add(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_add(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_add(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfadd.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_add(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_add(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_add(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredand.vs vd, vs2, vs1, vm # vd[0] = and( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredand_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t res = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res &= env->vfp.vreg[src2].u8[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = res;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res &= env->vfp.vreg[src2].u16[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = res;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res &= env->vfp.vreg[src2].u32[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = res;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res &= env->vfp.vreg[src2].u64[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = res;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfredsum.vs vd, vs2, vs1, vm # Unordered sum */
> +void VECTOR_HELPER(vfredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + float16 sum16 = 0.0f;
> + float32 sum32 = 0.0f;
> + float64 sum64 = 0.0f;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 16:
> + if (i == 0) {
> + sum16 = env->vfp.vreg[rs1].f16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum16 = float16_add(sum16, env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f16[0] = sum16;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + sum32 = env->vfp.vreg[rs1].f32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum32 = float32_add(sum32, env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f32[0] = sum32;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + sum64 = env->vfp.vreg[rs1].f64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum64 = float64_add(sum64, env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f64[0] = sum64;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + - env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + - env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + - env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + - env->vfp.vreg[src1].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + - env->gpr[rs1];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + - env->gpr[rs1];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + - env->gpr[rs1];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + - (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredor.vs vd, vs2, vs1, vm # vd[0] = or( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredor_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t res = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res |= env->vfp.vreg[src2].u8[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = res;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res |= env->vfp.vreg[src2].u16[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = res;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res |= env->vfp.vreg[src2].u32[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = res;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res |= env->vfp.vreg[src2].u64[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = res;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsub.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_sub(
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[src1].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_sub(
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[src1].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_sub(
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[src1].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsub.vf vd, vs2, rs1, vm # Vector-scalar vd[i] = vs2[i] - f[rs1] */
> +void VECTOR_HELPER(vfsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_sub(
> +
> env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_sub(
> +
> env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_sub(
> +
> env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vrsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + - env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + - env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + - env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + - env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vrsub_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + - env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + - env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + - env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + - env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredxor.vs vd, vs2, vs1, vm # vd[0] = xor( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredxor_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t res = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res ^= env->vfp.vreg[src2].u8[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = res;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res ^= env->vfp.vreg[src2].u16[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = res;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res ^= env->vfp.vreg[src2].u32[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = res;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res ^= env->vfp.vreg[src2].u64[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = res;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfredosum.vs vd, vs2, vs1, vm # Ordered sum */
> +void VECTOR_HELPER(vfredosum_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + helper_vector_vfredsum_vs(env, vm, rs1, rs2, rd);
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vminu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] <=
> + env->vfp.vreg[src2].u8[j]) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src1].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] <=
> + env->vfp.vreg[src2].u16[j]) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src1].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] <=
> + env->vfp.vreg[src2].u32[j]) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src1].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] <=
> + env->vfp.vreg[src2].u64[j]) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src1].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vminu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].u8[j]) {
> + env->vfp.vreg[dest].u8[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].u16[j]) {
> + env->vfp.vreg[dest].u16[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].u32[j]) {
> + env->vfp.vreg[dest].u32[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) <=
> + env->vfp.vreg[src2].u64[j]) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1]);
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredminu.vs vd, vs2, vs1, vm # vd[0] = minu( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredminu_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t minu = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + minu = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (minu > env->vfp.vreg[src2].u8[j]) {
> + minu = env->vfp.vreg[src2].u8[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = minu;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + minu = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (minu > env->vfp.vreg[src2].u16[j]) {
> + minu = env->vfp.vreg[src2].u16[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = minu;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + minu = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (minu > env->vfp.vreg[src2].u32[j]) {
> + minu = env->vfp.vreg[src2].u32[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = minu;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + minu = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (minu > env->vfp.vreg[src2].u64[j]) {
> + minu = env->vfp.vreg[src2].u64[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = minu;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmin.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfmin_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_minnum(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_minnum(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_minnum(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmin.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfmin_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_minnum(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_minnum(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_minnum(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmin_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s8[j] <=
> + env->vfp.vreg[src2].s8[j]) {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src1].s8[j];
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src2].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s16[j] <=
> + env->vfp.vreg[src2].s16[j]) {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src1].s16[j];
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src2].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s32[j] <=
> + env->vfp.vreg[src2].s32[j]) {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src1].s32[j];
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src2].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s64[j] <=
> + env->vfp.vreg[src2].s64[j]) {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src1].s64[j];
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src2].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmin_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int8_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].s8[j]) {
> + env->vfp.vreg[dest].s8[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src2].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int16_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].s16[j]) {
> + env->vfp.vreg[dest].s16[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src2].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int32_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].s32[j]) {
> + env->vfp.vreg[dest].s32[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src2].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int64_t)extend_gpr(env->gpr[rs1]) <=
> + env->vfp.vreg[src2].s64[j]) {
> + env->vfp.vreg[dest].s64[j] =
> + (int64_t)extend_gpr(env->gpr[rs1]);
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src2].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredmin.vs vd, vs2, vs1, vm # vd[0] = min( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredmin_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + int64_t min = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + min = env->vfp.vreg[rs1].s8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (min > env->vfp.vreg[src2].s8[j]) {
> + min = env->vfp.vreg[src2].s8[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s8[0] = min;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + min = env->vfp.vreg[rs1].s16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (min > env->vfp.vreg[src2].s16[j]) {
> + min = env->vfp.vreg[src2].s16[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s16[0] = min;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + min = env->vfp.vreg[rs1].s32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (min > env->vfp.vreg[src2].s32[j]) {
> + min = env->vfp.vreg[src2].s32[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s32[0] = min;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + min = env->vfp.vreg[rs1].s64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (min > env->vfp.vreg[src2].s64[j]) {
> + min = env->vfp.vreg[src2].s64[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s64[0] = min;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfredmin.vs vd, vs2, vs1, vm # Minimum value */
> +void VECTOR_HELPER(vfredmin_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + float16 min16 = 0.0f;
> + float32 min32 = 0.0f;
> + float64 min64 = 0.0f;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 16:
> + if (i == 0) {
> + min16 = env->vfp.vreg[rs1].f16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + min16 = float16_minnum(min16,
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f16[0] = min16;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + min32 = env->vfp.vreg[rs1].f32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + min32 = float32_minnum(min32,
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f32[0] = min32;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + min64 = env->vfp.vreg[rs1].f64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + min64 = float64_minnum(min64,
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f64[0] = min64;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmaxu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] >=
> + env->vfp.vreg[src2].u8[j]) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src1].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] >=
> + env->vfp.vreg[src2].u16[j]) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src1].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] >=
> + env->vfp.vreg[src2].u32[j]) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src1].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] >=
> + env->vfp.vreg[src2].u64[j]) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src1].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmaxu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].u8[j]) {
> + env->vfp.vreg[dest].u8[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].u16[j]) {
> + env->vfp.vreg[dest].u16[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].u32[j]) {
> + env->vfp.vreg[dest].u32[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) >=
> + env->vfp.vreg[src2].u64[j]) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1]);
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredmaxu.vs vd, vs2, vs1, vm # vd[0] = maxu( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredmaxu_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t maxu = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + maxu = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (maxu < env->vfp.vreg[src2].u8[j]) {
> + maxu = env->vfp.vreg[src2].u8[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = maxu;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + maxu = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (maxu < env->vfp.vreg[src2].u16[j]) {
> + maxu = env->vfp.vreg[src2].u16[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = maxu;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + maxu = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (maxu < env->vfp.vreg[src2].u32[j]) {
> + maxu = env->vfp.vreg[src2].u32[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = maxu;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + maxu = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (maxu < env->vfp.vreg[src2].u64[j]) {
> + maxu = env->vfp.vreg[src2].u64[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = maxu;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*vfmax.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfmax_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_maxnum(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_maxnum(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_maxnum(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmax.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfmax_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_maxnum(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_maxnum(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_maxnum(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmax_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s8[j] >=
> + env->vfp.vreg[src2].s8[j]) {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src1].s8[j];
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src2].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s16[j] >=
> + env->vfp.vreg[src2].s16[j]) {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src1].s16[j];
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src2].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s32[j] >=
> + env->vfp.vreg[src2].s32[j]) {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src1].s32[j];
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src2].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s64[j] >=
> + env->vfp.vreg[src2].s64[j]) {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src1].s64[j];
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src2].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmax_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int8_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].s8[j]) {
> + env->vfp.vreg[dest].s8[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src2].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int16_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].s16[j]) {
> + env->vfp.vreg[dest].s16[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src2].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int32_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].s32[j]) {
> + env->vfp.vreg[dest].s32[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src2].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int64_t)extend_gpr(env->gpr[rs1]) >=
> + env->vfp.vreg[src2].s64[j]) {
> + env->vfp.vreg[dest].s64[j] =
> + (int64_t)extend_gpr(env->gpr[rs1]);
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src2].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredmax.vs vd, vs2, vs1, vm # vd[0] = max( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredmax_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + int64_t max = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + max = env->vfp.vreg[rs1].s8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (max < env->vfp.vreg[src2].s8[j]) {
> + max = env->vfp.vreg[src2].s8[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s8[0] = max;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + max = env->vfp.vreg[rs1].s16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (max < env->vfp.vreg[src2].s16[j]) {
> + max = env->vfp.vreg[src2].s16[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s16[0] = max;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + max = env->vfp.vreg[rs1].s32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (max < env->vfp.vreg[src2].s32[j]) {
> + max = env->vfp.vreg[src2].s32[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s32[0] = max;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + max = env->vfp.vreg[rs1].s64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (max < env->vfp.vreg[src2].s64[j]) {
> + max = env->vfp.vreg[src2].s64[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s64[0] = max;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfredmax.vs vd, vs2, vs1, vm # Maximum value */
> +void VECTOR_HELPER(vfredmax_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + float16 max16 = 0.0f;
> + float32 max32 = 0.0f;
> + float64 max64 = 0.0f;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 16:
> + if (i == 0) {
> + max16 = env->vfp.vreg[rs1].f16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + max16 = float16_maxnum(max16,
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f16[0] = max16;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + max32 = env->vfp.vreg[rs1].f32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + max32 = float32_maxnum(max32,
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f32[0] = max32;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + max64 = env->vfp.vreg[rs1].f64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + max64 = float64_maxnum(max64,
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f64[0] = max64;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnj.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfsgnj_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> +
> env->vfp.vreg[src1].f16[j],
> + 0,
> + 15,
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> +
> env->vfp.vreg[src1].f32[j],
> + 0,
> + 31,
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> +
> env->vfp.vreg[src1].f64[j],
> + 0,
> + 63,
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnj.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfsgnj_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> + env->fpr[rs1],
> + 0,
> + 15,
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> + env->fpr[rs1],
> + 0,
> + 31,
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> + env->fpr[rs1],
> + 0,
> + 63,
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vand_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
> + & env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + & env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + & env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + & env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vand_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + & env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + & env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + & env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + & env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vand_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + & env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + & env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + & env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + & env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnjn.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfsgnjn_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> +
> ~env->vfp.vreg[src1].f16[j],
> + 0,
> + 15,
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> +
> ~env->vfp.vreg[src1].f32[j],
> + 0,
> + 31,
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> +
> ~env->vfp.vreg[src1].f64[j],
> + 0,
> + 63,
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +/* vfsgnjn.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfsgnjn_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> + ~env->fpr[rs1],
> + 0,
> + 15,
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> + ~env->fpr[rs1],
> + 0,
> + 31,
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> + ~env->fpr[rs1],
> + 0,
> + 63,
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vor_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
> + | env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + | env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + | env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + | env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vor_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + | env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + | env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + | env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + | env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vor_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + | env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + | env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + | env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + | env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnjx.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfsgnjx_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> +
> env->vfp.vreg[src1].f16[j] ^
> +
> env->vfp.vreg[src2].f16[j],
> + 0,
> + 15,
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> +
> env->vfp.vreg[src1].f32[j] ^
> +
> env->vfp.vreg[src2].f32[j],
> + 0,
> + 31,
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> +
> env->vfp.vreg[src1].f64[j] ^
> +
> env->vfp.vreg[src2].f64[j],
> + 0,
> + 63,
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnjx.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfsgnjx_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> + env->fpr[rs1] ^
> +
> env->vfp.vreg[src2].f16[j],
> + 0,
> + 15,
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> + env->fpr[rs1] ^
> +
> env->vfp.vreg[src2].f32[j],
> + 0,
> + 31,
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> + env->fpr[rs1] ^
> +
> env->vfp.vreg[src2].f64[j],
> + 0,
> + 63,
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vxor_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
> + ^ env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + ^ env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + ^ env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + ^ env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vxor_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + ^ env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + ^ env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + ^ env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + ^ env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vxor_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + ^ env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + ^ env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + ^ env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + ^ env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadc_vvm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax, carry;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
> + + env->vfp.vreg[src2].u8[j] + carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j]
> + + env->vfp.vreg[src2].u16[j] + carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j]
> + + env->vfp.vreg[src2].u32[j] + carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j]
> + + env->vfp.vreg[src2].u64[j] + carry;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vadc_vxm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax, carry;
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u8[j] + carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u16[j] + carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u32[j] + carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] =
> (uint64_t)extend_gpr(env->gpr[rs1])
> + + env->vfp.vreg[src2].u64[j] + carry;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadc_vim)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax, carry;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u8[j] + carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u16[j] + carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u32[j] + carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u64[j] + carry;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmadc_vvm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax, carry;
> + uint64_t tmp;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src1].u8[j]
> + + env->vfp.vreg[src2].u8[j] + carry;
> + tmp = tmp >> width;
> +
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src1].u16[j]
> + + env->vfp.vreg[src2].u16[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)env->vfp.vreg[src1].u32[j]
> + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src1].u64[j]
> + + env->vfp.vreg[src2].u64[j] + carry;
> +
> + if ((tmp < env->vfp.vreg[src1].u64[j] ||
> + tmp < env->vfp.vreg[src2].u64[j])
> + || (env->vfp.vreg[src1].u64[j] == MAX_U64 &&
> + env->vfp.vreg[src2].u64[j] == MAX_U64)) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmadc_vxm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax, carry;
> + uint64_t tmp, extend_rs1;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint8_t)env->gpr[rs1]
> + + env->vfp.vreg[src2].u8[j] + carry;
> + tmp = tmp >> width;
> +
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint16_t)env->gpr[rs1]
> + + env->vfp.vreg[src2].u16[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)((uint32_t)env->gpr[rs1])
> + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> +
> + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]);
> + tmp = extend_rs1 + env->vfp.vreg[src2].u64[j] + carry;
> + if ((tmp < extend_rs1) ||
> + (carry && (env->vfp.vreg[src2].u64[j] == MAX_U64))) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmadc_vim)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax, carry;
> + uint64_t tmp;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint8_t)sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u8[j] + carry;
> + tmp = tmp >> width;
> +
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint16_t)sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u16[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)((uint32_t)sign_extend(rs1, 5))
> + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u64[j] + carry;
> +
> + if ((tmp < (uint64_t)sign_extend(rs1, 5) ||
> + tmp < env->vfp.vreg[src2].u64[j])
> + || ((uint64_t)sign_extend(rs1, 5) == MAX_U64 &&
> + env->vfp.vreg[src2].u64[j] == MAX_U64)) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsbc_vvm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax, carry;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + - env->vfp.vreg[src1].u8[j] - carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
> + - env->vfp.vreg[src1].u16[j] - carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
> + - env->vfp.vreg[src1].u32[j] - carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
> + - env->vfp.vreg[src1].u64[j] - carry;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsbc_vxm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax, carry;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + - env->gpr[rs1] - carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
> + - env->gpr[rs1] - carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
> + - env->gpr[rs1] - carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
> + - (uint64_t)extend_gpr(env->gpr[rs1]) - carry;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsbc_vvm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax, carry;
> + uint64_t tmp;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u8[j]
> + - env->vfp.vreg[src1].u8[j] - carry;
> + tmp = (tmp >> width) & 0x1;
> +
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u16[j]
> + - env->vfp.vreg[src1].u16[j] - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)env->vfp.vreg[src2].u32[j]
> + - (uint64_t)env->vfp.vreg[src1].u32[j] - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u64[j]
> + - env->vfp.vreg[src1].u64[j] - carry;
> +
> + if (((env->vfp.vreg[src1].u64[j] == MAX_U64) && carry) ||
> + env->vfp.vreg[src2].u64[j] <
> + (env->vfp.vreg[src1].u64[j] + carry)) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsbc_vxm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax, carry;
> + uint64_t tmp, extend_rs1;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u8[j]
> + - (uint8_t)env->gpr[rs1] - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u16[j]
> + - (uint16_t)env->gpr[rs1] - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)env->vfp.vreg[src2].u32[j]
> + - (uint64_t)((uint32_t)env->gpr[rs1]) - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> +
> + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]);
> + tmp = env->vfp.vreg[src2].u64[j] - extend_rs1 - carry;
> +
> + if ((tmp > env->vfp.vreg[src2].u64[j]) ||
> + ((extend_rs1 == MAX_U64) && carry)) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> +
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmpopc.m rd, vs2, v0.t # x[rd] = sum_i ( vs2[i].LSB && v0[i].LSB ) */
> +void VECTOR_HELPER(vmpopc_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + env->gpr[rd] = 0;
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_mask_reg(env, rs2, width, lmul, i) &&
> + vector_elem_mask(env, vm, width, lmul, i)) {
> + env->gpr[rd]++;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfirst.m rd, vs2, vm */
> +void VECTOR_HELPER(vmfirst_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_mask_reg(env, rs2, width, lmul, i) &&
> + vector_elem_mask(env, vm, width, lmul, i)) {
> + env->gpr[rd] = i;
> + break;
> + }
> + } else {
> + env->gpr[rd] = -1;
> + }
> + }
> +
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmerge_vvm)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl, idx, pos;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src1].u8[j];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src1].u16[j];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src1].u32[j];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + case 64:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src1].u64[j];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmerge_vxm)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl, idx, pos;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1];
> + }
> + break;
> + case 16:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1];
> + }
> + break;
> + case 32:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1];
> + }
> + break;
> + case 64:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmerge_vim)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl, idx, pos;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + (uint8_t)sign_extend(rs1, 5);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u8[j] = (uint8_t)sign_extend(rs1,
> 5);
> + }
> + break;
> + case 16:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + (uint16_t)sign_extend(rs1, 5);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u16[j] =
> (uint16_t)sign_extend(rs1, 5);
> + }
> + break;
> + case 32:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + (uint32_t)sign_extend(rs1, 5);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u32[j] =
> (uint32_t)sign_extend(rs1, 5);
> + }
> + break;
> + case 64:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)sign_extend(rs1, 5);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u64[j] =
> (uint64_t)sign_extend(rs1, 5);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmerge.vfm vd, vs2, rs1, v0 # vd[i] = v0[i].LSB ? f[rs1] : vs2[i] */
> +void VECTOR_HELPER(vfmerge_vfm)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* vfmv.v.f vd, rs1 # vd[i] = f[rs1]; */
> + if (vm && (rs2 != 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = env->fpr[rs1];
> + } else {
> + env->vfp.vreg[dest].f16[j] =
> env->vfp.vreg[src2].f16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = env->fpr[rs1];
> + } else {
> + env->vfp.vreg[dest].f32[j] =
> env->vfp.vreg[src2].f32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = env->fpr[rs1];
> + } else {
> + env->vfp.vreg[dest].f64[j] =
> env->vfp.vreg[src2].f64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmseq_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] ==
> + env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] ==
> + env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] ==
> + env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] ==
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmseq_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] ==
> env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] ==
> env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] ==
> env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) ==
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmseq_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)sign_extend(rs1, 5)
> + == env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)sign_extend(rs1, 5)
> + == env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)sign_extend(rs1, 5)
> + == env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)sign_extend(rs1, 5) ==
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmandnot.mm vd, vs2, vs1 # vd = vs2 & ~vs1 */
> +void VECTOR_HELPER(vmandnot_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) &
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfeq.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmfeq_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_eq_quiet(env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_eq_quiet(env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_eq_quiet(env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfeq.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfeq_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmsne_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] !=
> + env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] !=
> + env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] !=
> + env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] !=
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsne_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] !=
> env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] !=
> env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] !=
> env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) !=
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsne_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)sign_extend(rs1, 5)
> + != env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)sign_extend(rs1, 5)
> + != env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)sign_extend(rs1, 5)
> + != env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)sign_extend(rs1, 5) !=
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmand.mm vd, vs2, vs1 # vd = vs2 & vs1 */
> +void VECTOR_HELPER(vmand_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) &
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfle.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmfle_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_le(env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[src1].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_le(env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[src1].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_le(env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[src1].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfle.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfle_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_le(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_le(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_le(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsltu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <
> + env->vfp.vreg[src1].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <
> + env->vfp.vreg[src1].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <
> + env->vfp.vreg[src1].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <
> + env->vfp.vreg[src1].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsltu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <
> (uint8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <
> (uint16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <
> (uint32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <
> + (uint64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmor.mm vd, vs2, vs1 # vd = vs2 | vs1 */
> +void VECTOR_HELPER(vmor_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) |
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmford.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmford_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float16_unordered_quiet(env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float32_unordered_quiet(env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float64_unordered_quiet(env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmford.vf vd, vs2, rs1, vm # Vector-scalar */
> +void VECTOR_HELPER(vmford_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float16_unordered_quiet(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float32_unordered_quiet(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float64_unordered_quiet(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmslt_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <
> + env->vfp.vreg[src1].s8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <
> + env->vfp.vreg[src1].s16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <
> + env->vfp.vreg[src1].s32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <
> + env->vfp.vreg[src1].s64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmslt_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <
> (int8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <
> (int16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <
> (int32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <
> + (int64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmxor.mm vd, vs2, vs1 # vd = vs2 ^ vs1 */
> +void VECTOR_HELPER(vmxor_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmflt.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmflt_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_lt(env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[src1].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_lt(env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[src1].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_lt(env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[src1].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmflt.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmflt_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_lt(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_lt(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_lt(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsleu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <=
> + env->vfp.vreg[src1].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <=
> + env->vfp.vreg[src1].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <=
> + env->vfp.vreg[src1].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <=
> + env->vfp.vreg[src1].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsleu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <=
> (uint8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <=
> (uint16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <=
> (uint32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <=
> + (uint64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsleu_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <= (uint8_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <= (uint16_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <= (uint32_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <=
> + (uint64_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmornot.mm vd, vs2, vs1 # vd = vs2 | ~vs1 */
> +void VECTOR_HELPER(vmornot_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) |
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfne.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmfne_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_eq_quiet(env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_eq_quiet(env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_eq_quiet(env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfne.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfne_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsle_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <=
> + env->vfp.vreg[src1].s8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <=
> + env->vfp.vreg[src1].s16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <=
> + env->vfp.vreg[src1].s32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <=
> + env->vfp.vreg[src1].s64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsle_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <=
> (int8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <=
> (int16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <=
> (int32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <=
> + (int64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsle_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <=
> + (int8_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <=
> + (int16_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <=
> + (int32_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <=
> + sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmnand.mm vd, vs2, vs1 # vd = ~(vs2 & vs1) */
> +void VECTOR_HELPER(vmnand_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) &
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, (~tmp & 0x1));
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfgt.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfgt_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_le(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_le(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_le(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsgtu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] >
> (uint8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] >
> (uint16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] >
> (uint32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] >
> + (uint64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsgtu_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] > (uint8_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] > (uint16_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] > (uint32_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] >
> + (uint64_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmnor.mm vd, vs2, vs1 # vd = ~(vs2 | vs1) */
> +void VECTOR_HELPER(vmnor_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) |
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmsgt_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] >
> (int8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] >
> (int16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] >
> (int32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] >
> + (int64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsgt_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] >
> + (int8_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] >
> + (int16_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] >
> + (int32_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] >
> + sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +/* vmxnor.mm vd, vs2, vs1 # vd = ~(vs2 ^ vs1) */
> +void VECTOR_HELPER(vmxnor_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfge.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfge_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_lt(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_lt(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_lt(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsaddu.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vsaddu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_add_u8(env,
> + env->vfp.vreg[src1].u8[j],
> env->vfp.vreg[src2].u8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_add_u16(env,
> + env->vfp.vreg[src1].u16[j],
> env->vfp.vreg[src2].u16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_add_u32(env,
> + env->vfp.vreg[src1].u32[j],
> env->vfp.vreg[src2].u32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_add_u64(env,
> + env->vfp.vreg[src1].u64[j],
> env->vfp.vreg[src2].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsaddu.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vsaddu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_add_u8(env,
> + env->vfp.vreg[src2].u8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_add_u16(env,
> + env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_add_u32(env,
> + env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_add_u64(env,
> + env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsaddu.vi vd, vs2, imm, vm # vector-immediate */
> +void VECTOR_HELPER(vsaddu_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_add_u8(env,
> + env->vfp.vreg[src2].u8[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_add_u16(env,
> + env->vfp.vreg[src2].u16[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_add_u32(env,
> + env->vfp.vreg[src2].u32[j], rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_add_u64(env,
> + env->vfp.vreg[src2].u64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vdivu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] == 0) {
> + env->vfp.vreg[dest].u8[j] = MAX_U8;
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j] /
> + env->vfp.vreg[src1].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] == 0) {
> + env->vfp.vreg[dest].u16[j] = MAX_U16;
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + / env->vfp.vreg[src1].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] == 0) {
> + env->vfp.vreg[dest].u32[j] = MAX_U32;
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + / env->vfp.vreg[src1].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] == 0) {
> + env->vfp.vreg[dest].u64[j] = MAX_U64;
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + / env->vfp.vreg[src1].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vdivu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u8[j] = MAX_U8;
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j] /
> + (uint8_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u16[j] = MAX_U16;
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + / (uint16_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u32[j] = MAX_U32;
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + / (uint32_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) {
> + env->vfp.vreg[dest].u64[j] = MAX_U64;
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + / (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfdiv.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_div(
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[src1].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_div(
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[src1].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_div(
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[src1].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfdiv.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_div(
> +
> env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_div(
> +
> env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_div(
> +
> env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsadd.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vsadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_add_s8(env,
> + env->vfp.vreg[src1].s8[j],
> env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_add_s16(env,
> + env->vfp.vreg[src1].s16[j],
> env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_add_s32(env,
> + env->vfp.vreg[src1].s32[j],
> env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_add_s64(env,
> + env->vfp.vreg[src1].s64[j],
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsadd.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vsadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_add_s8(env,
> + env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_add_s16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_add_s32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_add_s64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsadd.vi vd, vs2, imm, vm # vector-immediate */
> +void VECTOR_HELPER(vsadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_add_s8(env,
> + env->vfp.vreg[src2].s8[j], sign_extend(rs1, 5));
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_add_s16(env,
> + env->vfp.vreg[src2].s16[j], sign_extend(rs1, 5));
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_add_s32(env,
> + env->vfp.vreg[src2].s32[j], sign_extend(rs1, 5));
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_add_s64(env,
> + env->vfp.vreg[src2].s64[j], sign_extend(rs1, 5));
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s8[j] == 0) {
> + env->vfp.vreg[dest].s8[j] = -1;
> + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) &&
> + (env->vfp.vreg[src1].s8[j] == (int8_t)(-1))) {
> + env->vfp.vreg[dest].s8[j] = MIN_S8;
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j] /
> + env->vfp.vreg[src1].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s16[j] == 0) {
> + env->vfp.vreg[dest].s16[j] = -1;
> + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) &&
> + (env->vfp.vreg[src1].s16[j] == (int16_t)(-1))) {
> + env->vfp.vreg[dest].s16[j] = MIN_S16;
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + / env->vfp.vreg[src1].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s32[j] == 0) {
> + env->vfp.vreg[dest].s32[j] = -1;
> + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) &&
> + (env->vfp.vreg[src1].s32[j] == (int32_t)(-1))) {
> + env->vfp.vreg[dest].s32[j] = MIN_S32;
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + / env->vfp.vreg[src1].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s64[j] == 0) {
> + env->vfp.vreg[dest].s64[j] = -1;
> + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) &&
> + (env->vfp.vreg[src1].s64[j] == (int64_t)(-1))) {
> + env->vfp.vreg[dest].s64[j] = MIN_S64;
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + / env->vfp.vreg[src1].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vdiv_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int8_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s8[j] = -1;
> + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) &&
> + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) {
> + env->vfp.vreg[dest].s8[j] = MIN_S8;
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j] /
> + (int8_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int16_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s16[j] = -1;
> + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) &&
> + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) {
> + env->vfp.vreg[dest].s16[j] = MIN_S16;
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + / (int16_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int32_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s32[j] = -1;
> + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) &&
> + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) {
> + env->vfp.vreg[dest].s32[j] = MIN_S32;
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + / (int32_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) {
> + env->vfp.vreg[dest].s64[j] = -1;
> + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) &&
> + ((int64_t)extend_gpr(env->gpr[rs1]) ==
> (int64_t)(-1))) {
> + env->vfp.vreg[dest].s64[j] = MIN_S64;
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + / (int64_t)extend_gpr(env->gpr[rs1]);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i] */
> +void VECTOR_HELPER(vfrdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_div(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_div(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_div(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssubu.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vssubu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env,
> + env->vfp.vreg[src2].u8[j],
> env->vfp.vreg[src1].u8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env,
> + env->vfp.vreg[src2].u16[j],
> env->vfp.vreg[src1].u16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env,
> + env->vfp.vreg[src2].u32[j],
> env->vfp.vreg[src1].u32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env,
> + env->vfp.vreg[src2].u64[j],
> env->vfp.vreg[src1].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssubu.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vssubu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env,
> + env->vfp.vreg[src2].u8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env,
> + env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env,
> + env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env,
> + env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vremu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] == 0) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j] %
> + env->vfp.vreg[src1].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] == 0) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + % env->vfp.vreg[src1].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] == 0) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + % env->vfp.vreg[src1].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] == 0) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + % env->vfp.vreg[src1].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vremu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j] %
> + (uint8_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + % (uint16_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + % (uint32_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + % (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmsbf.m vd, vs2, vm # set-before-first mask bit */
> +void VECTOR_HELPER(vmsbf_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + bool first_mask_bit = false;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (first_mask_bit) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + continue;
> + }
> + if (!vector_mask_reg(env, rs2, width, lmul, i)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + first_mask_bit = true;
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmsif.m vd, vs2, vm # set-including-first mask bit */
> +void VECTOR_HELPER(vmsif_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + bool first_mask_bit = false;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (first_mask_bit) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + continue;
> + }
> + if (!vector_mask_reg(env, rs2, width, lmul, i)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + first_mask_bit = true;
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + }
> + }
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmsof.m vd, vs2, vm # set-only-first mask bit */
> +void VECTOR_HELPER(vmsof_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + bool first_mask_bit = false;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (first_mask_bit) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + continue;
> + }
> + if (!vector_mask_reg(env, rs2, width, lmul, i)) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + first_mask_bit = true;
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + }
> + }
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* viota.m v4, v2, v0.t */
> +void VECTOR_HELPER(viota_m)(CPURISCVState *env, uint32_t vm, uint32_t rs2,
> + uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest;
> + uint32_t sum = 0;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 1)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sum;
> + if (vector_mask_reg(env, rs2, width, lmul, i)) {
> + sum++;
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sum;
> + if (vector_mask_reg(env, rs2, width, lmul, i)) {
> + sum++;
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sum;
> + if (vector_mask_reg(env, rs2, width, lmul, i)) {
> + sum++;
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sum;
> + if (vector_mask_reg(env, rs2, width, lmul, i)) {
> + sum++;
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vid.v vd, vm # Write element ID to destination. */
> +void VECTOR_HELPER(vid_v)(CPURISCVState *env, uint32_t vm, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = i;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = i;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = i;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = i;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssub.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vssub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env,
> + env->vfp.vreg[src2].s8[j],
> env->vfp.vreg[src1].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env,
> + env->vfp.vreg[src2].s16[j],
> env->vfp.vreg[src1].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env,
> + env->vfp.vreg[src2].s32[j],
> env->vfp.vreg[src1].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env,
> + env->vfp.vreg[src2].s64[j],
> env->vfp.vreg[src1].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssub.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vssub_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env,
> + env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vrem_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s8[j] == 0) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j];
> + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) &&
> + (env->vfp.vreg[src1].s8[j] == (int8_t)(-1))) {
> + env->vfp.vreg[dest].s8[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j] %
> + env->vfp.vreg[src1].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s16[j] == 0) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j];
> + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) &&
> + (env->vfp.vreg[src1].s16[j] == (int16_t)(-1))) {
> + env->vfp.vreg[dest].s16[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + % env->vfp.vreg[src1].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s32[j] == 0) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j];
> + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) &&
> + (env->vfp.vreg[src1].s32[j] == (int32_t)(-1))) {
> + env->vfp.vreg[dest].s32[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + % env->vfp.vreg[src1].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s64[j] == 0) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j];
> + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) &&
> + (env->vfp.vreg[src1].s64[j] == (int64_t)(-1))) {
> + env->vfp.vreg[dest].s64[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + % env->vfp.vreg[src1].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vrem_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int8_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j];
> + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) &&
> + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) {
> + env->vfp.vreg[dest].s8[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j] %
> + (int8_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int16_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j];
> + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) &&
> + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) {
> + env->vfp.vreg[dest].s16[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + % (int16_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int32_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j];
> + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) &&
> + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) {
> + env->vfp.vreg[dest].s32[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + % (int32_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j];
> + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) &&
> + ((int64_t)extend_gpr(env->gpr[rs1]) ==
> (int64_t)(-1))) {
> + env->vfp.vreg[dest].s64[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + % (int64_t)extend_gpr(env->gpr[rs1]);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vaadd.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vaadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(env,
> + env->vfp.vreg[src1].s8[j],
> env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(env,
> + env->vfp.vreg[src1].s16[j],
> env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(env,
> + env->vfp.vreg[src1].s32[j],
> env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(env,
> + env->vfp.vreg[src1].s64[j],
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vaadd.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vaadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(env,
> + env->gpr[rs1], env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(env,
> + env->gpr[rs1], env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(env,
> + env->gpr[rs1], env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(env,
> + env->gpr[rs1], env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vaadd.vi vd, vs2, imm, vm # vector-immediate */
> +void VECTOR_HELPER(vaadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(env,
> + rs1, env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(env,
> + rs1, env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(env,
> + rs1, env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(env,
> + rs1, env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmulhu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> + ((uint16_t)env->vfp.vreg[src1].u8[j]
> + * (uint16_t)env->vfp.vreg[src2].u8[j]) >> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + ((uint32_t)env->vfp.vreg[src1].u16[j]
> + * (uint32_t)env->vfp.vreg[src2].u16[j]) >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + ((uint64_t)env->vfp.vreg[src1].u32[j]
> + * (uint64_t)env->vfp.vreg[src2].u32[j]) >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = u64xu64_lh(
> + env->vfp.vreg[src1].u64[j],
> env->vfp.vreg[src2].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmulhu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> + ((uint16_t)(uint8_t)env->gpr[rs1]
> + * (uint16_t)env->vfp.vreg[src2].u8[j]) >> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + ((uint32_t)(uint16_t)env->gpr[rs1]
> + * (uint32_t)env->vfp.vreg[src2].u16[j]) >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + ((uint64_t)(uint32_t)env->gpr[rs1]
> + * (uint64_t)env->vfp.vreg[src2].u32[j]) >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = u64xu64_lh(
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + , env->vfp.vreg[src2].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmul.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_mul(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_mul(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_mul(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmul.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_mul(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_mul(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_mul(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsll_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + << (env->vfp.vreg[src1].u8[j] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + << (env->vfp.vreg[src1].u16[j] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + << (env->vfp.vreg[src1].u32[j] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + << (env->vfp.vreg[src1].u64[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsll_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + << (env->gpr[rs1] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + << (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + << (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + << ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsll_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + << (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + << (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + << (rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + << (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->gpr[rs1]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = env->gpr[rs1]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = env->gpr[rs1]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> + (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vasub.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vasub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(
> + env,
> + ~env->vfp.vreg[src1].s8[j] + 1,
> + env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(
> + env,
> + ~env->vfp.vreg[src1].s16[j] + 1,
> + env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(
> + env,
> + ~env->vfp.vreg[src1].s32[j] + 1,
> + env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(
> + env,
> + ~env->vfp.vreg[src1].s64[j] + 1,
> + env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vasub.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vasub_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(
> + env, ~env->gpr[rs1] + 1,
> env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(
> + env, ~env->gpr[rs1] + 1,
> env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(
> + env, ~env->gpr[rs1] + 1,
> env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(
> + env, ~env->gpr[rs1] + 1,
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmulhsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> + ((uint16_t)env->vfp.vreg[src1].u8[j]
> + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> + ((uint32_t)env->vfp.vreg[src1].u16[j]
> + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> + ((uint64_t)env->vfp.vreg[src1].u32[j]
> + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = s64xu64_lh(
> + env->vfp.vreg[src2].s64[j],
> env->vfp.vreg[src1].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmulhsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> + ((uint16_t)(uint8_t)env->gpr[rs1]
> + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> + ((uint32_t)(uint16_t)env->gpr[rs1]
> + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> + ((uint64_t)(uint32_t)env->gpr[rs1]
> + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = s64xu64_lh(
> + env->vfp.vreg[src2].s64[j],
> + (uint64_t)extend_gpr(env->gpr[rs1]));
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsmul.vv vd, vs2, vs1, vm # vd[i] =
> clip((vs2[i]*vs1[i]+round)>>(SEW-1)) */
> +void VECTOR_HELPER(vsmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if ((!(vm)) && rd == 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vsmul_8(env,
> + env->vfp.vreg[src1].s8[j],
> env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vsmul_16(env,
> + env->vfp.vreg[src1].s16[j],
> env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vsmul_32(env,
> + env->vfp.vreg[src1].s32[j],
> env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vsmul_64(env,
> + env->vfp.vreg[src1].s64[j],
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsmul.vx vd, vs2, rs1, vm # vd[i] =
> clip((vs2[i]*x[rs1]+round)>>(SEW-1)) */
> +void VECTOR_HELPER(vsmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if ((!(vm)) && rd == 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vsmul_8(env,
> + env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vsmul_16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vsmul_32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vsmul_64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmulh_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> + ((int16_t)env->vfp.vreg[src1].s8[j]
> + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> + ((int32_t)env->vfp.vreg[src1].s16[j]
> + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> + ((int64_t)env->vfp.vreg[src1].s32[j]
> + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = s64xs64_lh(
> + env->vfp.vreg[src1].s64[j],
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmulh_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> + ((int16_t)(int8_t)env->gpr[rs1]
> + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> + ((int32_t)(int16_t)env->gpr[rs1]
> + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> + ((int64_t)(int32_t)env->gpr[rs1]
> + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = s64xs64_lh(
> + (int64_t)extend_gpr(env->gpr[rs1])
> + , env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfrsub.vf vd, vs2, rs1, vm # Scalar-vector vd[i] = f[rs1] - vs2[i] */
> +void VECTOR_HELPER(vfrsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_sub(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_sub(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_sub(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + >> (env->vfp.vreg[src1].u8[j] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + >> (env->vfp.vreg[src1].u16[j] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + >> (env->vfp.vreg[src1].u32[j] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + >> (env->vfp.vreg[src1].u64[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + >> (env->gpr[rs1] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + >> (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + >> (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + >> ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + >> (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + >> (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + >> (rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + >> (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmadd.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) + vs2[i] */
> +void VECTOR_HELPER(vfmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmadd.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) + vs2[i] */
> +void VECTOR_HELPER(vfmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsra_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
> + >> (env->vfp.vreg[src1].s8[j] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + >> (env->vfp.vreg[src1].s16[j] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + >> (env->vfp.vreg[src1].s32[j] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + >> (env->vfp.vreg[src1].s64[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsra_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
> + >> (env->gpr[rs1] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + >> (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + >> (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + >> ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsra_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
> + >> (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + >> (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + >> (rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + >> (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[dest].s8[j]
> + + env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[dest].s16[j]
> + + env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[dest].s32[j]
> + + env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[dest].s64[j]
> + + env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->gpr[rs1]
> + * env->vfp.vreg[dest].s8[j]
> + + env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = env->gpr[rs1]
> + * env->vfp.vreg[dest].s16[j]
> + + env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = env->gpr[rs1]
> + * env->vfp.vreg[dest].s32[j]
> + + env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> + (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[dest].s64[j]
> + + env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmadd.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) - vs2[i] */
> +void VECTOR_HELPER(vfnmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmadd.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) - vs2[i] */
> +void VECTOR_HELPER(vfnmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssrl.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i] */
> +void VECTOR_HELPER(vssrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = vssrl_8(env,
> + env->vfp.vreg[src2].u8[j],
> env->vfp.vreg[src1].u8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = vssrl_16(env,
> + env->vfp.vreg[src2].u16[j],
> env->vfp.vreg[src1].u16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = vssrl_32(env,
> + env->vfp.vreg[src2].u32[j],
> env->vfp.vreg[src1].u32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = vssrl_64(env,
> + env->vfp.vreg[src2].u64[j],
> env->vfp.vreg[src1].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssrl.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */
> +void VECTOR_HELPER(vssrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = vssrl_8(env,
> + env->vfp.vreg[src2].u8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = vssrl_16(env,
> + env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = vssrl_32(env,
> + env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = vssrl_64(env,
> + env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssrl.vi vd, vs2, imm, vm # vd[i] = ((vs2[i] + round)>>imm) */
> +void VECTOR_HELPER(vssrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = vssrli_8(env,
> + env->vfp.vreg[src2].u8[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = vssrli_16(env,
> + env->vfp.vreg[src2].u16[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = vssrli_32(env,
> + env->vfp.vreg[src2].u32[j], rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = vssrli_64(env,
> + env->vfp.vreg[src2].u64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmsub.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) - vs2[i] */
> +void VECTOR_HELPER(vfmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmsub.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) - vs2[i] */
> +void VECTOR_HELPER(vfmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssra.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i]) */
> +void VECTOR_HELPER(vssra_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vssra_8(env,
> + env->vfp.vreg[src2].s8[j],
> env->vfp.vreg[src1].u8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vssra_16(env,
> + env->vfp.vreg[src2].s16[j],
> env->vfp.vreg[src1].u16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vssra_32(env,
> + env->vfp.vreg[src2].s32[j],
> env->vfp.vreg[src1].u32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vssra_64(env,
> + env->vfp.vreg[src2].s64[j],
> env->vfp.vreg[src1].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssra.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */
> +void VECTOR_HELPER(vssra_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vssra_8(env,
> + env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vssra_16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vssra_32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vssra_64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssra.vi vd, vs2, imm, vm # vd[i] = ((vs2[i] + round)>>imm) */
> +void VECTOR_HELPER(vssra_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vssrai_8(env,
> + env->vfp.vreg[src2].s8[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vssrai_16(env,
> + env->vfp.vreg[src2].s16[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vssrai_32(env,
> + env->vfp.vreg[src2].s32[j], rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vssrai_64(env,
> + env->vfp.vreg[src2].s64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
> + - env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[dest].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + - env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[dest].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + - env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[dest].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + - env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[dest].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnmsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
> + - env->gpr[rs1]
> + * env->vfp.vreg[dest].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + - env->gpr[rs1]
> + * env->vfp.vreg[dest].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + - env->gpr[rs1]
> + * env->vfp.vreg[dest].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + - (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[dest].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmsub.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) + vs2[i] */
> +void VECTOR_HELPER(vfnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> +
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmsub.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) + vs2[i] */
> +void VECTOR_HELPER(vfnmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vnsrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k]
> + >> (env->vfp.vreg[src1].u8[j] & 0xf);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u32[k]
> + >> (env->vfp.vreg[src1].u16[j] & 0x1f);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u64[k]
> + >> (env->vfp.vreg[src1].u32[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k]
> + >> (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u32[k]
> + >> (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u64[k]
> + >> (env->gpr[rs1] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k]
> + >> (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u32[k]
> + >> (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u64[k]
> + >> (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsra_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k]
> + >> (env->vfp.vreg[src1].s8[j] & 0xf);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s32[k]
> + >> (env->vfp.vreg[src1].s16[j] & 0x1f);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s64[k]
> + >> (env->vfp.vreg[src1].s32[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsra_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k]
> + >> (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s32[k]
> + >> (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s64[k]
> + >> (env->gpr[rs1] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsra_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k]
> + >> (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s32[k]
> + >> (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s64[k]
> + >> (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] += env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] +=
> env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] +=
> env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] +=
> env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] += env->gpr[rs1]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] += env->gpr[rs1]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] += env->gpr[rs1]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] +=
> + (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclipu.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vnclipu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, k, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[k] = vnclipu_16(env,
> + env->vfp.vreg[src2].u16[j],
> env->vfp.vreg[src1].u8[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vnclipu_32(env,
> + env->vfp.vreg[src2].u32[j],
> env->vfp.vreg[src1].u16[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vnclipu_64(env,
> + env->vfp.vreg[src2].u64[j],
> env->vfp.vreg[src1].u32[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclipu.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vnclipu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[k] = vnclipu_16(env,
> + env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vnclipu_32(env,
> + env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vnclipu_64(env,
> + env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +
> +/* vnclipu.vi vd, vs2, imm, vm # vector-immediate */
> +void VECTOR_HELPER(vnclipu_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[k] = vnclipui_16(env,
> + env->vfp.vreg[src2].u16[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vnclipui_32(env,
> + env->vfp.vreg[src2].u32[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vnclipui_64(env,
> + env->vfp.vreg[src2].u64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclip.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vnclip_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, k, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[k] = vnclip_16(env,
> + env->vfp.vreg[src2].s16[j],
> env->vfp.vreg[src1].u8[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vnclip_32(env,
> + env->vfp.vreg[src2].s32[j],
> env->vfp.vreg[src1].u16[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vnclip_64(env,
> + env->vfp.vreg[src2].s64[j],
> env->vfp.vreg[src1].u32[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclip.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vnclip_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, k, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[k] = vnclip_16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vnclip_32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vnclip_64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclip.vi vd, vs2, imm, vm # vector-immediate */
> +void VECTOR_HELPER(vnclip_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, k, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[k] = vnclipi_16(env,
> + env->vfp.vreg[src2].s16[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vnclipi_32(env,
> + env->vfp.vreg[src2].s32[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vnclipi_64(env,
> + env->vfp.vreg[src2].s64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] -= env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] -=
> env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] -=
> env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] -=
> env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnmsac_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] -= env->gpr[rs1]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] -= env->gpr[rs1]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] -= env->gpr[rs1]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] -=
> + (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vwredsumu.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(zero-extend(SEW))
> */
> +void VECTOR_HELPER(vwredsumu_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t sum = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u8[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u16[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = sum;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u16[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u32[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = sum;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u32[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u64[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = sum;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwaddu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src1].u8[j] +
> + (uint16_t)env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src1].u16[j] +
> + (uint32_t)env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src1].u32[j] +
> + (uint64_t)env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwaddu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u8[j] +
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u16[j] +
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u32[j] +
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwadd.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_add(
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_add(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwadd.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_add(
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_add(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->fpr[rs1],
> &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vwredsum.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(sign-extend(SEW)) */
> +void VECTOR_HELPER(vwredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + int64_t sum = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += (int16_t)env->vfp.vreg[src2].s8[j] << 8 >> 8;
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].s16[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s16[0] = sum;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += (int32_t)env->vfp.vreg[src2].s16[j] << 16 >>
> 16;
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].s32[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s32[0] = sum;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += (int64_t)env->vfp.vreg[src2].s32[j] << 32 >>
> 32;
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].s64[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s64[0] = sum;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src1].s8[j] +
> + (int16_t)env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src1].s16[j] +
> + (int32_t)env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src1].s32[j] +
> + (int64_t)env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) +
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) +
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) +
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vd, vs2, vs1, vm # Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW))
> */
> +void VECTOR_HELPER(vfwredsum_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + float32 sum32 = 0.0f;
> + float64 sum64 = 0.0f;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 16:
> + if (i == 0) {
> + sum32 = env->vfp.vreg[rs1].f32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum32 = float32_add(sum32,
> +
> float16_to_float32(env->vfp.vreg[src2].f16[j],
> + true, &env->fp_status),
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f32[0] = sum32;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + sum64 = env->vfp.vreg[rs1].f64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum64 = float64_add(sum64,
> +
> float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f64[0] = sum64;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsubu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u8[j] -
> + (uint16_t)env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u16[j] -
> + (uint32_t)env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u32[j] -
> + (uint64_t)env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsubu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u8[j] -
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u16[j] -
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u32[j] -
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwsub.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_sub(
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_sub(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwsub.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_sub(
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_sub(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->fpr[rs1],
> &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s8[j] -
> + (int16_t)env->vfp.vreg[src1].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s16[j] -
> + (int32_t)env->vfp.vreg[src1].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s32[j] -
> + (int64_t)env->vfp.vreg[src1].s32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) -
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) -
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) -
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vfwredosum.vs vd, vs2, vs1, vm #
> + * Ordered reduce 2*SEW = 2*SEW + sum(promote(SEW))
> + */
> +void VECTOR_HELPER(vfwredosum_vs)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + helper_vector_vfwredsum_vs(env, vm, rs1, rs2, rd);
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwaddu_wv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src1].u8[j] +
> + (uint16_t)env->vfp.vreg[src2].u16[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src1].u16[j] +
> + (uint32_t)env->vfp.vreg[src2].u32[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src1].u32[j] +
> + (uint64_t)env->vfp.vreg[src2].u64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwaddu_wx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u16[k] +
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u32[k] +
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u64[k] +
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwadd.wv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_add(
> + env->vfp.vreg[src2].f32[k],
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_add(
> + env->vfp.vreg[src2].f64[k],
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwadd.wf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwadd_wf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_add(
> + env->vfp.vreg[src2].f32[k],
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_add(
> + env->vfp.vreg[src2].f64[k],
> + float32_to_float64(env->fpr[rs1],
> &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]) +
> + (int16_t)env->vfp.vreg[src2].s16[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]) +
> + (int32_t)env->vfp.vreg[src2].s32[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]) +
> + (int64_t)env->vfp.vreg[src2].s64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwadd_wx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s16[k] +
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s32[k] +
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s64[k] +
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsubu_wv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u16[k] -
> + (uint16_t)env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u32[k] -
> + (uint32_t)env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u64[k] -
> + (uint64_t)env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsubu_wx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u16[k] -
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u32[k] -
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u64[k] -
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwsub.wv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_sub(
> + env->vfp.vreg[src2].f32[k],
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_sub(
> + env->vfp.vreg[src2].f64[k],
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwsub.wf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwsub_wf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_sub(
> + env->vfp.vreg[src2].f32[k],
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_sub(
> + env->vfp.vreg[src2].f64[k],
> + float32_to_float64(env->fpr[rs1],
> &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s16[k] -
> + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s32[k] -
> + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s64[k] -
> + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsub_wx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s16[k] -
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s32[k] -
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s64[k] -
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmulu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src1].u8[j] *
> + (uint16_t)env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src1].u16[j] *
> + (uint32_t)env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src1].u32[j] *
> + (uint64_t)env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmulu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u8[j] *
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u16[j] *
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u32[j] *
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmul.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_mul(
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_mul(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmul.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_mul(
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_mul(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->fpr[rs1],
> &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmulsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s8[j] *
> + (uint16_t)env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s16[j] *
> + (uint32_t)env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s32[j] *
> + (uint64_t)env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmulsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src1].s8[j] *
> + (int16_t)env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src1].s16[j] *
> + (int32_t)env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src1].s32[j] *
> + (int64_t)env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccu.vv vd, vs1, vs2, vm #
> + * vd[i] = clipu((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env,
> +
> env->vfp.vreg[src2].u8[j],
> +
> env->vfp.vreg[src1].u8[j],
> +
> env->vfp.vreg[dest].u16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env,
> +
> env->vfp.vreg[src2].u16[j],
> +
> env->vfp.vreg[src1].u16[j],
> +
> env->vfp.vreg[dest].u32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env,
> +
> env->vfp.vreg[src2].u32[j],
> +
> env->vfp.vreg[src1].u32[j],
> +
> env->vfp.vreg[dest].u64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccu.vx vd, rs1, vs2, vm #
> + * vd[i] = clipu((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env,
> +
> env->vfp.vreg[src2].u8[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].u16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env,
> +
> env->vfp.vreg[src2].u16[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].u32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env,
> +
> env->vfp.vreg[src2].u32[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].u64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmaccu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] +=
> + (uint16_t)env->vfp.vreg[src1].u8[j] *
> + (uint16_t)env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] +=
> + (uint32_t)env->vfp.vreg[src1].u16[j] *
> + (uint32_t)env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] +=
> + (uint64_t)env->vfp.vreg[src1].u32[j] *
> + (uint64_t)env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmaccu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] +=
> + (uint16_t)env->vfp.vreg[src2].u8[j] *
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] +=
> + (uint32_t)env->vfp.vreg[src2].u16[j] *
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] +=
> + (uint64_t)env->vfp.vreg[src2].u32[j] *
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfwmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfwmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + env->fpr[rs1],
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + env->fpr[rs1],
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmacc.vv vd, vs1, vs2, vm #
> + * vd[i] = clip((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env,
> +
> env->vfp.vreg[src2].s8[j],
> +
> env->vfp.vreg[src1].s8[j],
> +
> env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env,
> +
> env->vfp.vreg[src2].s16[j],
> +
> env->vfp.vreg[src1].s16[j],
> +
> env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env,
> +
> env->vfp.vreg[src2].s32[j],
> +
> env->vfp.vreg[src1].s32[j],
> +
> env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmacc.vx vd, rs1, vs2, vm #
> + * vd[i] = clip((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env,
> +
> env->vfp.vreg[src2].s8[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env,
> +
> env->vfp.vreg[src2].s16[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env,
> +
> env->vfp.vreg[src2].s32[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccsu.vv vd, vs1, vs2, vm
> + * # vd[i] = clip(-((signed(vs1[i])*unsigned(vs2[i])+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccsu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env,
> +
> env->vfp.vreg[src2].u8[j],
> +
> env->vfp.vreg[src1].s8[j],
> +
> env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env,
> +
> env->vfp.vreg[src2].u16[j],
> +
> env->vfp.vreg[src1].s16[j],
> +
> env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env,
> +
> env->vfp.vreg[src2].u32[j],
> +
> env->vfp.vreg[src1].s32[j],
> +
> env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccsu.vx vd, rs1, vs2, vm
> + * # vd[i] = clip(-((signed(x[rs1])*unsigned(vs2[i])+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccsu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env,
> +
> env->vfp.vreg[src2].u8[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env,
> +
> env->vfp.vreg[src2].u16[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env,
> +
> env->vfp.vreg[src2].u32[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccus.vx vd, rs1, vs2, vm
> + * # vd[i] = clip(-((unsigned(x[rs1])*signed(vs2[i])+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccus_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmaccus_8(env,
> +
> env->vfp.vreg[src2].s8[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmaccus_16(env,
> +
> env->vfp.vreg[src2].s16[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmaccus_32(env,
> +
> env->vfp.vreg[src2].s32[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vwmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (int16_t)env->vfp.vreg[src1].s8[j]
> + * (int16_t)env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (int32_t)env->vfp.vreg[src1].s16[j] *
> + (int32_t)env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (int64_t)env->vfp.vreg[src1].s32[j] *
> + (int64_t)env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfwnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfwnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + env->fpr[rs1],
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + env->fpr[rs1],
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmaccsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (int16_t)env->vfp.vreg[src1].s8[j]
> + * (uint16_t)env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (int32_t)env->vfp.vreg[src1].s16[j] *
> + (uint32_t)env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (int64_t)env->vfp.vreg[src1].s32[j] *
> + (uint64_t)env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmaccsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (uint16_t)((uint8_t)env->vfp.vreg[src2].u8[j]) *
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (uint32_t)((uint16_t)env->vfp.vreg[src2].u16[j]) *
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (uint64_t)((uint32_t)env->vfp.vreg[src2].u32[j]) *
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfwmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfwmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + env->fpr[rs1],
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + env->fpr[rs1],
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmaccus_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfwnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfwnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + env->fpr[rs1],
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + env->fpr[rs1],
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +
> +/* vfsqrt.v vd, vs2, vm # Vector-vector square root */
> +void VECTOR_HELPER(vfsqrt_v)(CPURISCVState *env, uint32_t vm, uint32_t
> rs2,
> + uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_sqrt(
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_sqrt(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_sqrt(
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfclass.v vd, vs2, vm # Vector-vector */
> +void VECTOR_HELPER(vfclass_v)(CPURISCVState *env, uint32_t vm, uint32_t
> rs2,
> + uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = helper_fclass_h(
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = helper_fclass_s(
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = helper_fclass_d(
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
> +void VECTOR_HELPER(vfcvt_xu_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = float16_to_uint16(
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = float32_to_uint32(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = float64_to_uint64(
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
> +void VECTOR_HELPER(vfcvt_x_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = float16_to_int16(
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = float32_to_int32(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = float64_to_int64(
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
> +void VECTOR_HELPER(vfcvt_f_xu_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = uint16_to_float16(
> +
> env->vfp.vreg[src2].u16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = uint32_to_float32(
> +
> env->vfp.vreg[src2].u32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = uint64_to_float64(
> +
> env->vfp.vreg[src2].u64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
> +void VECTOR_HELPER(vfcvt_f_x_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = int16_to_float16(
> +
> env->vfp.vreg[src2].s16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = int32_to_float32(
> +
> env->vfp.vreg[src2].s32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = int64_to_float64(
> +
> env->vfp.vreg[src2].s64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned
> integer.*/
> +void VECTOR_HELPER(vfwcvt_xu_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = float16_to_uint32(
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] = float32_to_uint64(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + }
> + } else {
> + vector_tail_fwiden(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed
> integer. */
> +void VECTOR_HELPER(vfwcvt_x_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = float16_to_int32(
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = float32_to_int64(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width
> float */
> +void VECTOR_HELPER(vfwcvt_f_xu_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = uint16_to_float32(
> +
> env->vfp.vreg[src2].u16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = uint32_to_float64(
> +
> env->vfp.vreg[src2].u32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
> +void VECTOR_HELPER(vfwcvt_f_x_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = int16_to_float32(
> +
> env->vfp.vreg[src2].s16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = int32_to_float64(
> +
> env->vfp.vreg[src2].s32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vfwcvt.f.f.v vd, vs2, vm #
> + * Convert single-width float to double-width float.
> + */
> +void VECTOR_HELPER(vfwcvt_f_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float16_to_float32(
> +
> env->vfp.vreg[src2].f16[j],
> + true,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float32_to_float64(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
> +void VECTOR_HELPER(vfncvt_xu_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = float32_to_uint16(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = float64_to_uint32(
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed
> integer. */
> +void VECTOR_HELPER(vfncvt_x_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = float32_to_int16(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = float64_to_int32(
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to
> float */
> +void VECTOR_HELPER(vfncvt_f_xu_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[k] = uint32_to_float16(
> +
> env->vfp.vreg[src2].u32[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = uint64_to_float32(
> +
> env->vfp.vreg[src2].u64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
> +void VECTOR_HELPER(vfncvt_f_x_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[k] = int32_to_float16(
> +
> env->vfp.vreg[src2].s32[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = int64_to_float32(
> +
> env->vfp.vreg[src2].s64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float.
> */
> +void VECTOR_HELPER(vfncvt_f_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[k] = float32_to_float16(
> +
> env->vfp.vreg[src2].f32[j],
> + true,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float64_to_float32(
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s8[j] =
> + cpu_ldsb_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlsbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlsb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].s8[j] =
> + cpu_ldsb_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s8[j] =
> + cpu_ldsb_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> sign_extend(
> + cpu_ldsb_data(env, addr), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsb_data(env, addr), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsb_data(env, addr), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlbuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlbff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s8[j] =
> + cpu_ldsb_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlhu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> + cpu_ldsw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlshu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlsh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> + cpu_ldsw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxhu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_lduw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_lduw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> + cpu_ldsw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsw_data(env, addr), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsw_data(env, addr), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlhuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlhff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> + cpu_ldsw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldl_data(env, env->gpr[rs1] + read), 32);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlwu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlswu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlsw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldl_data(env, env->gpr[rs1] + read), 32);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxwu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldl_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> + cpu_ldl_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldl_data(env, addr), 32);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlwuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlwff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldl_data(env, env->gpr[rs1] + read), 32);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vle_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 8;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldq_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 8;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldq_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 8,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldq_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vleff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->vfp.vl = 0;
> + env->foflag = true;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 8;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldq_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vssb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsuxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + return VECTOR_HELPER(vsxb_v)(env, nf, vm, rs1, rs2, rd);
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vssh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + cpu_stw_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + cpu_stw_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + cpu_stw_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsuxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + return VECTOR_HELPER(vsxh_v)(env, nf, vm, rs1, rs2, rd);
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vssw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + cpu_stl_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + cpu_stl_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsuxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + return VECTOR_HELPER(vsxw_v)(env, nf, vm, rs1, rs2, rd);
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 8;
> + cpu_stq_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 8;
> + cpu_stq_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + cpu_stw_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + cpu_stl_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 8,
> width, k);
> + cpu_stq_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsuxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + return VECTOR_HELPER(vsxe_v)(env, nf, vm, rs1, rs2, rd);
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoswapw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_xchgl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_xchgl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env,
> addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env,
> addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamoswapd_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_xchgq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_xchgq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> +
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoaddw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_addl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_addl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_addl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_addl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vamoaddd_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_addq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_addq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoxorw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_xorl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_xorl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamoxord_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_xorq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_xorq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoandw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_andl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_andl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_andl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_andl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoandd_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_andq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_andq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoorw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_orl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_orl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_orl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_orl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoord_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_orq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_orq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamominw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_sminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_sminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamomind_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_sminq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_sminq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamomaxw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_smaxl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_smaxl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamomaxd_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_smaxq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_smaxq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamominuw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_uminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_uminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le(
> + env, addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le(
> + env, addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamominud_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_uminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_uminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_uminq_le(
> + env, addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_uminq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamomaxuw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_umaxl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_umaxl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le(
> + env, addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le(
> + env, addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vamomaxud_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_umaxq_le(
> + env, addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_umaxq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> --
> 2.7.4
>
>
>
[-- Attachment #2: Type: text/html, Size: 1319249 bytes --]
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 18:54 ` [Qemu-riscv] " Richard Henderson
@ 2019-08-28 20:43 ` Richard Henderson
-1 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-08-28 20:43 UTC (permalink / raw)
To: liuzhiwei, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 8/28/19 11:54 AM, Richard Henderson wrote:
> But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a
> single bit.
BTW, it is reasonable to check VSTART == 0 always. Quoting the spec:
# Implementations are permitted to raise illegal instruction exceptions
# when attempting to execute a vector instruction with a value of vstart
# that the implementation can never produce when executing that same
# instruction with the same vtype setting.
Since qemu will never interrupt a single instruction, each vector instruction
will always run to completion, which clears VSTART. Since QEMU will never
produce a non-zero value of VSTART, it is allowed to trap on any non-zero
setting of VSTART.
I.e. it can be handled at translation time alongside VILL.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-28 20:43 ` Richard Henderson
0 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-08-28 20:43 UTC (permalink / raw)
To: liuzhiwei, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 8/28/19 11:54 AM, Richard Henderson wrote:
> But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a
> single bit.
BTW, it is reasonable to check VSTART == 0 always. Quoting the spec:
# Implementations are permitted to raise illegal instruction exceptions
# when attempting to execute a vector instruction with a value of vstart
# that the implementation can never produce when executing that same
# instruction with the same vtype setting.
Since qemu will never interrupt a single instruction, each vector instruction
will always run to completion, which clears VSTART. Since QEMU will never
produce a non-zero value of VSTART, it is allowed to trap on any non-zero
setting of VSTART.
I.e. it can be handled at translation time alongside VILL.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 2:36 [Qemu-riscv] [PATCH] RISCV: support riscv vector extension 0.7.1 liuzhiwei
@ 2019-08-28 21:34 ` Alistair Francis
2019-08-28 18:54 ` [Qemu-riscv] " Richard Henderson
` (3 subsequent siblings)
4 siblings, 0 replies; 52+ messages in thread
From: Alistair Francis @ 2019-08-28 21:34 UTC (permalink / raw)
To: liuzhiwei
Cc: Peter Maydell, Riku Voipio, open list:RISC-V, Sagar Karandikar,
Bastian Koppelmann, Palmer Dabbelt,
qemu-devel@nongnu.org Developers, Laurent Vivier,
Alistair Francis, Alex Bennée, Aurelien Jarno
On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>
> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
> ---
> fpu/softfloat.c | 119 +
> include/fpu/softfloat.h | 4 +
> linux-user/riscv/cpu_loop.c | 8 +-
> target/riscv/Makefile.objs | 2 +-
> target/riscv/cpu.h | 30 +
> target/riscv/cpu_bits.h | 15 +
> target/riscv/cpu_helper.c | 7 +
> target/riscv/csr.c | 65 +-
> target/riscv/helper.h | 354 +
> target/riscv/insn32.decode | 374 +-
> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> target/riscv/translate.c | 1 +
> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
> 13 files changed, 28017 insertions(+), 9 deletions(-)
> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> create mode 100644 target/riscv/vector_helper.c
>
Hello,
Thanks for the patch!
As others have pointed out you will need to split the patch up into
multiple smaller patches, otherwise it is too hard to review almost
30,000 lines of code.
Can you also include a cover letter with your patch series describing
how you are testing this? AFAIK vector extension support isn't in any
compiler so I'm assuming you are handwriting the assembly or have
toolchain patches. Either way it will help if you can share that so
others can test your implementation.
Alex and Richard have kindly started the review. Once you have
addressed their comments and split this patch up into smaller patches
you can send a v2 and we can go from there.
Once again thanks for doing this implementation for QEMU!
Alistair
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-28 21:34 ` Alistair Francis
0 siblings, 0 replies; 52+ messages in thread
From: Alistair Francis @ 2019-08-28 21:34 UTC (permalink / raw)
To: liuzhiwei
Cc: qemu-devel@nongnu.org Developers, open list:RISC-V,
Peter Maydell, Palmer Dabbelt, Sagar Karandikar,
Bastian Koppelmann, Riku Voipio, Laurent Vivier,
Alistair Francis, Alex Bennée, Aurelien Jarno
On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>
> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
> ---
> fpu/softfloat.c | 119 +
> include/fpu/softfloat.h | 4 +
> linux-user/riscv/cpu_loop.c | 8 +-
> target/riscv/Makefile.objs | 2 +-
> target/riscv/cpu.h | 30 +
> target/riscv/cpu_bits.h | 15 +
> target/riscv/cpu_helper.c | 7 +
> target/riscv/csr.c | 65 +-
> target/riscv/helper.h | 354 +
> target/riscv/insn32.decode | 374 +-
> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> target/riscv/translate.c | 1 +
> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
> 13 files changed, 28017 insertions(+), 9 deletions(-)
> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> create mode 100644 target/riscv/vector_helper.c
>
Hello,
Thanks for the patch!
As others have pointed out you will need to split the patch up into
multiple smaller patches, otherwise it is too hard to review almost
30,000 lines of code.
Can you also include a cover letter with your patch series describing
how you are testing this? AFAIK vector extension support isn't in any
compiler so I'm assuming you are handwriting the assembly or have
toolchain patches. Either way it will help if you can share that so
others can test your implementation.
Alex and Richard have kindly started the review. Once you have
addressed their comments and split this patch up into smaller patches
you can send a v2 and we can go from there.
Once again thanks for doing this implementation for QEMU!
Alistair
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 21:34 ` [Qemu-riscv] " Alistair Francis
@ 2019-08-29 12:00 ` liuzhiwei
-1 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-08-29 12:00 UTC (permalink / raw)
To: Alistair Francis
Cc: Peter Maydell, Riku Voipio, open list:RISC-V, Sagar Karandikar,
Bastian Koppelmann, Palmer Dabbelt,
qemu-devel@nongnu.org Developers, Laurent Vivier,
Alistair Francis, Alex Bennée, Aurelien Jarno
On 2019/8/29 上午5:34, Alistair Francis wrote:
> On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>> ---
>> fpu/softfloat.c | 119 +
>> include/fpu/softfloat.h | 4 +
>> linux-user/riscv/cpu_loop.c | 8 +-
>> target/riscv/Makefile.objs | 2 +-
>> target/riscv/cpu.h | 30 +
>> target/riscv/cpu_bits.h | 15 +
>> target/riscv/cpu_helper.c | 7 +
>> target/riscv/csr.c | 65 +-
>> target/riscv/helper.h | 354 +
>> target/riscv/insn32.decode | 374 +-
>> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>> target/riscv/translate.c | 1 +
>> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
>> 13 files changed, 28017 insertions(+), 9 deletions(-)
>> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
>> create mode 100644 target/riscv/vector_helper.c
>>
> Hello,
>
> Thanks for the patch!
>
> As others have pointed out you will need to split the patch up into
> multiple smaller patches, otherwise it is too hard to review almost
> 30,000 lines of code.
Hi, Alistair
I'm so sorry for the inconvenience. It will be a patch set with a cover
letter in V2.
> Can you also include a cover letter with your patch series describing
> how you are testing this? AFAIK vector extension support isn't in any
> compiler so I'm assuming you are handwriting the assembly or have
> toolchain patches. Either way it will help if you can share that so
> others can test your implementation.
Yes, it's handwriting assembly. The assembler in Binutils has support
Vector extension. First define an function test_vadd_vv_8 in assembly
and then it can be called from a C program.
The function is something like
/* vadd.vv */
TEST_FUNC(test_vadd_vv_8)
vsetvli t1, x0, e8, m2
vlb.v v6, (a4)
vsb.v v6, (a3)
vsetvli t1, a0, e8, m2
vlb.v v0, (a1)
vlb.v v2, (a2)
vadd.vv v4, v0, v2
vsb.v v4, (a3)
ret
.size test_vadd_vv_8, .-test_vadd_vv_8
It takes more time to test than to implement the instructions. Maybe
there is some better test method or some forced test cases in QEMU.
Could you give me some advice for testing?
Best Regards,
Zhiwei
> Alex and Richard have kindly started the review. Once you have
> addressed their comments and split this patch up into smaller patches
> you can send a v2 and we can go from there.
>
> Once again thanks for doing this implementation for QEMU!
>
> Alistair
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-29 12:00 ` liuzhiwei
0 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-08-29 12:00 UTC (permalink / raw)
To: Alistair Francis
Cc: qemu-devel@nongnu.org Developers, open list:RISC-V,
Peter Maydell, Palmer Dabbelt, Sagar Karandikar,
Bastian Koppelmann, Riku Voipio, Laurent Vivier,
Alistair Francis, Alex Bennée, Aurelien Jarno
On 2019/8/29 上午5:34, Alistair Francis wrote:
> On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>> ---
>> fpu/softfloat.c | 119 +
>> include/fpu/softfloat.h | 4 +
>> linux-user/riscv/cpu_loop.c | 8 +-
>> target/riscv/Makefile.objs | 2 +-
>> target/riscv/cpu.h | 30 +
>> target/riscv/cpu_bits.h | 15 +
>> target/riscv/cpu_helper.c | 7 +
>> target/riscv/csr.c | 65 +-
>> target/riscv/helper.h | 354 +
>> target/riscv/insn32.decode | 374 +-
>> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>> target/riscv/translate.c | 1 +
>> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
>> 13 files changed, 28017 insertions(+), 9 deletions(-)
>> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
>> create mode 100644 target/riscv/vector_helper.c
>>
> Hello,
>
> Thanks for the patch!
>
> As others have pointed out you will need to split the patch up into
> multiple smaller patches, otherwise it is too hard to review almost
> 30,000 lines of code.
Hi, Alistair
I'm so sorry for the inconvenience. It will be a patch set with a cover
letter in V2.
> Can you also include a cover letter with your patch series describing
> how you are testing this? AFAIK vector extension support isn't in any
> compiler so I'm assuming you are handwriting the assembly or have
> toolchain patches. Either way it will help if you can share that so
> others can test your implementation.
Yes, it's handwriting assembly. The assembler in Binutils has support
Vector extension. First define an function test_vadd_vv_8 in assembly
and then it can be called from a C program.
The function is something like
/* vadd.vv */
TEST_FUNC(test_vadd_vv_8)
vsetvli t1, x0, e8, m2
vlb.v v6, (a4)
vsb.v v6, (a3)
vsetvli t1, a0, e8, m2
vlb.v v0, (a1)
vlb.v v2, (a2)
vadd.vv v4, v0, v2
vsb.v v4, (a3)
ret
.size test_vadd_vv_8, .-test_vadd_vv_8
It takes more time to test than to implement the instructions. Maybe
there is some better test method or some forced test cases in QEMU.
Could you give me some advice for testing?
Best Regards,
Zhiwei
> Alex and Richard have kindly started the review. Once you have
> addressed their comments and split this patch up into smaller patches
> you can send a v2 and we can go from there.
>
> Once again thanks for doing this implementation for QEMU!
>
> Alistair
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 20:43 ` [Qemu-riscv] " Richard Henderson
@ 2019-08-29 12:45 ` liuzhiwei
-1 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-08-29 12:45 UTC (permalink / raw)
To: Richard Henderson, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 2019/8/29 上午4:43, Richard Henderson wrote:
> On 8/28/19 11:54 AM, Richard Henderson wrote:
>> But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a
>> single bit.
> BTW, it is reasonable to check VSTART == 0 always. Quoting the spec:
>
> # Implementations are permitted to raise illegal instruction exceptions
> # when attempting to execute a vector instruction with a value of vstart
> # that the implementation can never produce when executing that same
> # instruction with the same vtype setting.
>
> Since qemu will never interrupt a single instruction, each vector instruction
> will always run to completion, which clears VSTART. Since QEMU will never
> produce a non-zero value of VSTART, it is allowed to trap on any non-zero
> setting of VSTART.
>
> I.e. it can be handled at translation time alongside VILL.
Hi, Richard
I am so sorry for the inconvenience. It is very kind of you to review
the horrible long code and give so many comments.
Even in qemu, it may be some situations that VSTART != 0. For example,
a load instruction leads to a page fault exception in a middle position.
If VSTART == 0, some elements that had been loaded before the exception
will be loaded once again.
Specially, it may be a mistake if the instruction restores execution
with VSTART== 0. When lmul == 1,
"vlb v0 ,(a0), v0.t"
As v0 is the mask register, if it is modified, some part of it can't
be used again.
It will take some time to address the other comments. After that I will
split the patch into patch set with a cover letter in V2.
Thank you again for your review!
Best Regards,
Zhiwei
>
>
> r~
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-29 12:45 ` liuzhiwei
0 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-08-29 12:45 UTC (permalink / raw)
To: Richard Henderson, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 2019/8/29 上午4:43, Richard Henderson wrote:
> On 8/28/19 11:54 AM, Richard Henderson wrote:
>> But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a
>> single bit.
> BTW, it is reasonable to check VSTART == 0 always. Quoting the spec:
>
> # Implementations are permitted to raise illegal instruction exceptions
> # when attempting to execute a vector instruction with a value of vstart
> # that the implementation can never produce when executing that same
> # instruction with the same vtype setting.
>
> Since qemu will never interrupt a single instruction, each vector instruction
> will always run to completion, which clears VSTART. Since QEMU will never
> produce a non-zero value of VSTART, it is allowed to trap on any non-zero
> setting of VSTART.
>
> I.e. it can be handled at translation time alongside VILL.
Hi, Richard
I am so sorry for the inconvenience. It is very kind of you to review
the horrible long code and give so many comments.
Even in qemu, it may be some situations that VSTART != 0. For example,
a load instruction leads to a page fault exception in a middle position.
If VSTART == 0, some elements that had been loaded before the exception
will be loaded once again.
Specially, it may be a mistake if the instruction restores execution
with VSTART== 0. When lmul == 1,
"vlb v0 ,(a0), v0.t"
As v0 is the mask register, if it is modified, some part of it can't
be used again.
It will take some time to address the other comments. After that I will
split the patch into patch set with a cover letter in V2.
Thank you again for your review!
Best Regards,
Zhiwei
>
>
> r~
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 19:20 ` [Qemu-riscv] " Aleksandar Markovic
@ 2019-08-29 12:56 ` liuzhiwei
2019-08-29 18:32 ` [Qemu-riscv] " Aleksandar Markovic
0 siblings, 1 reply; 52+ messages in thread
From: liuzhiwei @ 2019-08-29 12:56 UTC (permalink / raw)
To: Aleksandar Markovic
Cc: QEMU Developers, open list:RISC-V, Peter Maydell, Palmer Dabbelt,
Sagar Karandikar, Bastian Koppelmann, Riku Voipio,
Laurent Vivier, Alistair Francis, Alex Bennée,
Aurelien Jarno
[-- Attachment #1: Type: text/plain, Size: 1274650 bytes --]
On 2019/8/29 上午3:20, Aleksandar Markovic wrote:
>
>
> > On Wed, Aug 28, 2019 at 9:04 AM liuzhiwei <zhiwei_liu@c-sky.com
> <mailto:zhiwei_liu@c-sky.com>> wrote:
>
> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com
> <mailto:zhiwei_liu@c-sky.com>>
> ---
>
>
> Such large patch and "Change-Id:
> I3cf891bc400713b95f47ecca82b1bf773f3dcb25" is its entire commit
> message?? Horrible.
Hi, Aleksandar
I am so sorry. A patch set with cover letter will be sent later.
Best Regards,
Zhiwei
> Aleksandar
>
> fpu/softfloat.c | 119 +
> include/fpu/softfloat.h | 4 +
> linux-user/riscv/cpu_loop.c | 8 +-
> target/riscv/Makefile.objs | 2 +-
> target/riscv/cpu.h | 30 +
> target/riscv/cpu_bits.h | 15 +
> target/riscv/cpu_helper.c | 7 +
> target/riscv/csr.c | 65 +-
> target/riscv/helper.h | 354 +
> target/riscv/insn32.decode | 374 +-
> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> target/riscv/translate.c | 1 +
> target/riscv/vector_helper.c | 26563
> ++++++++++++++++++++++++++++++
> 13 files changed, 28017 insertions(+), 9 deletions(-)
> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> create mode 100644 target/riscv/vector_helper.c
>
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index 2ba36ec..da155ea 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a)
> }
>
> /*----------------------------------------------------------------------------
> +| Returns the sign bit of the half-precision floating-point value
> `a'.
> +*----------------------------------------------------------------------------*/
> +
> +static inline flag extractFloat16Sign(float16 a)
> +{
> + return float16_val(a) >> 0xf;
> +}
> +
> +
> +/*----------------------------------------------------------------------------
> | Returns the fraction bits of the single-precision
> floating-point value `a'.
> *----------------------------------------------------------------------------*/
>
> @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b,
> float_status *status)
> }
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point value `a' is
> less than
> +| or equal to the corresponding value `b', and 0 otherwise. The
> invalid
> +| exception is raised if either operand is a NaN. The comparison
> is performed
> +| according to the IEC/IEEE Standard for Binary Floating-Point
> Arithmetic.
> +*----------------------------------------------------------------------------*/
> +
> +int float16_le(float16 a, float16 b, float_status *status)
> +{
> + flag aSign, bSign;
> + uint16_t av, bv;
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) &&
> extractFloat16Frac( a ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) &&
> extractFloat16Frac( b ) )
> + ) {
> + float_raise(float_flag_invalid, status);
> + return 0;
> + }
> + aSign = extractFloat16Sign( a );
> + bSign = extractFloat16Sign( b );
> + av = float16_val(a);
> + bv = float16_val(b);
> + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av |
> bv )<<1 ) == 0 );
> + return ( av == bv ) || ( aSign ^ ( av < bv ) );
> +
> +}
> +
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point value `a' is
> less than
> | or equal to the corresponding value `b', and 0 otherwise. The
> invalid
> | exception is raised if either operand is a NaN. The comparison
> is performed
> @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b,
> float_status *status)
> | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
> *----------------------------------------------------------------------------*/
>
> +int float16_lt(float16 a, float16 b, float_status *status)
> +{
> + flag aSign, bSign;
> + uint16_t av, bv;
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) &&
> extractFloat16Frac( a ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) &&
> extractFloat16Frac( b ) )
> + ) {
> + float_raise(float_flag_invalid, status);
> + return 0;
> + }
> + aSign = extractFloat16Sign( a );
> + bSign = extractFloat16Sign( b );
> + av = float16_val(a);
> + bv = float16_val(b);
> + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av |
> bv )<<1 ) != 0 );
> + return ( av != bv ) && ( aSign ^ ( av < bv ) );
> +
> +}
> +
> +/*----------------------------------------------------------------------------
> +| Returns 1 if the single-precision floating-point value `a' is
> less than
> +| the corresponding value `b', and 0 otherwise. The invalid
> exception is
> +| raised if either operand is a NaN. The comparison is performed
> according
> +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
> +*----------------------------------------------------------------------------*/
> +
> int float32_lt(float32 a, float32 b, float_status *status)
> {
> flag aSign, bSign;
> @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b,
> float_status *status)
> }
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point value `a' is
> equal to
> +| the corresponding value `b', and 0 otherwise. Quiet NaNs do
> not cause an
> +| exception. The comparison is performed according to the
> IEC/IEEE Standard
> +| for Binary Floating-Point Arithmetic.
> +*----------------------------------------------------------------------------*/
> +
> +int float16_eq_quiet(float16 a, float16 b, float_status *status)
> +{
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) &&
> extractFloat16Frac( a ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) &&
> extractFloat16Frac( b ) )
> + ) {
> + if (float16_is_signaling_nan(a, status)
> + || float16_is_signaling_nan(b, status)) {
> + float_raise(float_flag_invalid, status);
> + }
> + return 0;
> + }
> + return ( float16_val(a) == float16_val(b) ) ||
> + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1
> ) == 0 );
> +}
> +
> +
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point value `a' is
> equal to
> | the corresponding value `b', and 0 otherwise. Quiet NaNs do
> not cause an
> | exception. The comparison is performed according to the
> IEC/IEEE Standard
> @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b,
> float_status *status)
> }
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point values `a' and
> `b' cannot
> +| be compared, and 0 otherwise. Quiet NaNs do not cause an
> exception. The
> +| comparison is performed according to the IEC/IEEE Standard for
> Binary
> +| Floating-Point Arithmetic.
> +*----------------------------------------------------------------------------*/
> +
> +int float16_unordered_quiet(float16 a, float16 b, float_status
> *status)
> +{
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) &&
> extractFloat16Frac( a ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) &&
> extractFloat16Frac( b ) )
> + ) {
> + if (float16_is_signaling_nan(a, status)
> + || float16_is_signaling_nan(b, status)) {
> + float_raise(float_flag_invalid, status);
> + }
> + return 1;
> + }
> + return 0;
> +}
> +
> +
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point values `a' and
> `b' cannot
> | be compared, and 0 otherwise. Quiet NaNs do not cause an
> exception. The
> | comparison is performed according to the IEC/IEEE Standard for
> Binary
> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
> index 3ff3fa5..3b0754c 100644
> --- a/include/fpu/softfloat.h
> +++ b/include/fpu/softfloat.h
> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16,
> float_status *status);
> float16 float16_sqrt(float16, float_status *status);
> int float16_compare(float16, float16, float_status *status);
> int float16_compare_quiet(float16, float16, float_status *status);
> +int float16_unordered_quiet(float16, float16, float_status *status);
> +int float16_le(float16, float16, float_status *status);
> +int float16_lt(float16, float16, float_status *status);
> +int float16_eq_quiet(float16, float16, float_status *status);
>
> int float16_is_quiet_nan(float16, float_status *status);
> int float16_is_signaling_nan(float16, float_status *status);
> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
> index 12aa3c0..b01548a 100644
> --- a/linux-user/riscv/cpu_loop.c
> +++ b/linux-user/riscv/cpu_loop.c
> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
> signum = 0;
> sigcode = 0;
> sigaddr = 0;
> -
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + continue;
> + }
> + }
> switch (trapnr) {
> case EXCP_INTERRUPT:
> /* just indicate that signals should be handled asap */
> diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
> index b1c79bc..d577cef 100644
> --- a/target/riscv/Makefile.objs
> +++ b/target/riscv/Makefile.objs
> @@ -1,4 +1,4 @@
> -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o
> fpu_helper.o gdbstub.o pmp.o
> +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o
> fpu_helper.o vector_helper.o gdbstub.o pmp.o
>
> DECODETREE = $(SRC_PATH)/scripts/decodetree.py
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 0adb307..5a93aa2 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -67,6 +67,7 @@
> #define RVC RV('C')
> #define RVS RV('S')
> #define RVU RV('U')
> +#define RVV RV('V')
>
> /* S extension denotes that Supervisor mode exists, however it is
> possible
> to have a core that support S mode but does not have an MMU
> and there
> @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState;
>
> #include "pmp.h"
>
> +#define VLEN 128
> +#define VUNIT(x) (VLEN / x)
> +
> struct CPURISCVState {
> target_ulong gpr[32];
> uint64_t fpr[32]; /* assume both F and D extensions */
> +
> + /* vector coprocessor state. */
> + struct {
> + union VECTOR {
> + float64 f64[VUNIT(64)];
> + float32 f32[VUNIT(32)];
> + float16 f16[VUNIT(16)];
> + target_ulong ul[VUNIT(sizeof(target_ulong))];
> + uint64_t u64[VUNIT(64)];
> + int64_t s64[VUNIT(64)];
> + uint32_t u32[VUNIT(32)];
> + int32_t s32[VUNIT(32)];
> + uint16_t u16[VUNIT(16)];
> + int16_t s16[VUNIT(16)];
> + uint8_t u8[VUNIT(8)];
> + int8_t s8[VUNIT(8)];
> + } vreg[32];
> + target_ulong vxrm;
> + target_ulong vxsat;
> + target_ulong vl;
> + target_ulong vstart;
> + target_ulong vtype;
> + float_status fp_status;
> + } vfp;
> +
> + bool foflag;
> target_ulong pc;
> target_ulong load_res;
> target_ulong load_val;
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index 11f971a..9eb43ec 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -29,6 +29,14 @@
> #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT)
> #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA |
> FSR_DZA | FSR_NXA)
>
> +/* Vector Fixed-Point round model */
> +#define FSR_VXRM_SHIFT 9
> +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT)
> +
> +/* Vector Fixed-Point saturation flag */
> +#define FSR_VXSAT_SHIFT 8
> +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT)
> +
> /* Control and Status Registers */
>
> /* User Trap Setup */
> @@ -48,6 +56,13 @@
> #define CSR_FRM 0x002
> #define CSR_FCSR 0x003
>
> +/* User Vector CSRs */
> +#define CSR_VSTART 0x008
> +#define CSR_VXSAT 0x009
> +#define CSR_VXRM 0x00a
> +#define CSR_VL 0xc20
> +#define CSR_VTYPE 0xc21
> +
> /* User Timers and Counters */
> #define CSR_CYCLE 0xc00
> #define CSR_TIME 0xc01
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index e32b612..405caf6 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
> [PRV_H] = RISCV_EXCP_H_ECALL,
> [PRV_M] = RISCV_EXCP_M_ECALL
> };
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + return;
> + }
> + }
>
> if (!async) {
> /* set tval to badaddr for traps with address information */
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index e0d4586..a6131ff 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno)
> return 0;
> }
>
> -#if !defined(CONFIG_USER_ONLY)
> static int any(CPURISCVState *env, int csrno)
> {
> return 0;
> }
>
> +#if !defined(CONFIG_USER_ONLY)
> static int smode(CPURISCVState *env, int csrno)
> {
> return -!riscv_has_ext(env, RVS);
> @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int
> csrno, target_ulong *val)
> return -1;
> }
> #endif
> - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
> - | (env->frm << FSR_RD_SHIFT);
> + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT)
> + | (env->vfp.vxsat << FSR_VXSAT_SHIFT)
> + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
> + | (env->frm << FSR_RD_SHIFT);
> return 0;
> }
>
> @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env,
> int csrno, target_ulong val)
> env->mstatus |= MSTATUS_FS;
> #endif
> env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
> + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
> + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
> riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
> return 0;
> }
>
> +static int read_vtype(CPURISCVState *env, int csrno, target_ulong
> *val)
> +{
> + *val = env->vfp.vtype;
> + return 0;
> +}
> +
> +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vl;
> + return 0;
> +}
> +
> +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong
> *val)
> +{
> + *val = env->vfp.vxrm;
> + return 0;
> +}
> +
> +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong
> *val)
> +{
> + *val = env->vfp.vxsat;
> + return 0;
> +}
> +
> +static int read_vstart(CPURISCVState *env, int csrno,
> target_ulong *val)
> +{
> + *val = env->vfp.vstart;
> + return 0;
> +}
> +
> +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong
> val)
> +{
> + env->vfp.vxrm = val;
> + return 0;
> +}
> +
> +static int write_vxsat(CPURISCVState *env, int csrno,
> target_ulong val)
> +{
> + env->vfp.vxsat = val;
> + return 0;
> +}
> +
> +static int write_vstart(CPURISCVState *env, int csrno,
> target_ulong val)
> +{
> + env->vfp.vstart = val;
> + return 0;
> +}
> +
> /* User Timers and Counters */
> static int read_instret(CPURISCVState *env, int csrno,
> target_ulong *val)
> {
> @@ -873,7 +925,12 @@ static riscv_csr_operations
> csr_ops[CSR_TABLE_SIZE] = {
> [CSR_FFLAGS] = { fs, read_fflags,
> write_fflags },
> [CSR_FRM] = { fs, read_frm, write_frm
> },
> [CSR_FCSR] = { fs, read_fcsr, write_fcsr
> },
> -
> + /* Vector CSRs */
> + [CSR_VSTART] = { any, read_vstart,
> write_vstart },
> + [CSR_VXSAT] = { any, read_vxsat, write_vxsat
> },
> + [CSR_VXRM] = { any, read_vxrm, write_vxrm
> },
> + [CSR_VL] = { any, read_vl },
> + [CSR_VTYPE] = { any, read_vtype
> },
> /* User Timers and Counters */
> [CSR_CYCLE] = { ctr, read_instret
> },
> [CSR_INSTRET] = { ctr, read_instret
> },
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index debb22a..fee02c0 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl)
> DEF_HELPER_1(wfi, void, env)
> DEF_HELPER_1(tlb_flush, void, env)
> #endif
> +/* Vector functions */
> +DEF_HELPER_5(vector_vlb_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlh_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlw_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vle_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlbu_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlhu_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlwu_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlbff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlhff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlwff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vleff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlbuff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlhuff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlwuff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsb_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsh_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsw_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vse_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlsb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlsh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlsw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlse_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlsbu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlshu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlswu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vssb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vssh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vssw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsse_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxe_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxbu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxhu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxwu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsxb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsxh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsxw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsxe_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsuxb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsuxh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsuxw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsuxe_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoswapw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoswapd_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoaddw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoaddd_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoxorw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoxord_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoandw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoandd_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoorw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoord_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamominw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomind_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomaxw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomaxd_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamominuw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamominud_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomaxuw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomaxud_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_4(vector_vext_x_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfmv_f_s, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmv_s_x, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfmv_s_f, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vadc_vvm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vadc_vxm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vadc_vim, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmadc_vvm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmadc_vxm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmadc_vim, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vsbc_vvm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vsbc_vxm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsbc_vvm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsbc_vxm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmpopc_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmfirst_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vcompress_vm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmandnot_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmand_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmor_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmxor_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmornot_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmnand_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmnor_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmxnor_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsbf_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsof_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsif_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_viota_m, void, env, i32, i32, i32)
> +DEF_HELPER_3(vector_vid_v, void, env, i32, i32)
> +DEF_HELPER_4(vector_vfcvt_xu_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfcvt_x_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfcvt_f_xu_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfcvt_f_x_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_xu_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_x_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_f_xu_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_f_x_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_f_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_xu_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_x_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_f_xu_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_f_x_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_f_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfsqrt_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfclass_v, void, env, i32, i32, i32)
> +DEF_HELPER_5(vector_vadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vadd_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredsum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfadd_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredand_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfredsum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredor_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrsub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrsub_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredxor_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfredosum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vminu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vminu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredminu_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmin_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmin_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmin_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmin_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredmin_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfredmin_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmaxu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmaxu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredmaxu_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmax_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmax_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmax_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmax_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredmax_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfredmax_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnj_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnj_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vand_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vand_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vand_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnjn_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnjn_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vor_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vor_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vor_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnjx_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnjx_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vxor_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vxor_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vxor_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrgather_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrgather_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrgather_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslideup_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslideup_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslide1up_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslidedown_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslidedown_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslide1down_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmerge_vvm, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmerge_vxm, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmerge_vim, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmerge_vfm, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmseq_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmseq_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmseq_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfeq_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfeq_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsne_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsne_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsne_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfle_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfle_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsltu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsltu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmford_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmford_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmslt_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmslt_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmflt_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmflt_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsleu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsleu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsleu_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfne_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfne_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsle_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsle_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsle_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfgt_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsgtu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsgtu_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsgt_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsgt_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfge_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsaddu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsaddu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsaddu_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vdivu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vdivu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfdiv_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfdiv_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsadd_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vdiv_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vdiv_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfrdiv_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssubu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssubu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vremu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vremu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrem_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrem_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vaadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vaadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vaadd_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulhu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulhu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmul_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsll_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsll_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsll_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmul_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vasub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vasub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulhsu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulhsu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsmul_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulh_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulh_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfrsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsrl_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsrl_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsrl_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmadd_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsra_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsra_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsra_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmadd_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssrl_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssrl_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssrl_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssra_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssra_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssra_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnmsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnmsub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsrl_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsrl_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsrl_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmacc_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsra_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsra_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsra_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmacc_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmacc_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclipu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclipu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclipu_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmsac_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclip_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclip_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclip_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnmsac_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmsac_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwredsumu_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwaddu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwaddu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwadd_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwredsum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwredsum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsubu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsubu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwredosum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwaddu_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwaddu_wx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwadd_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwadd_wf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwadd_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwadd_wx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsubu_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsubu_wx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwsub_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwsub_wf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsub_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsub_wx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmulu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmulu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmul_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmulsu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmulsu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmul_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmacc_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmacc_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmacc_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwnmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwnmacc_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccsu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccsu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccsu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccsu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmsac_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccus_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccus_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwnmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwnmsac_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32)
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index 77f794e..d125ff9 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -25,7 +25,7 @@
> %sh10 20:10
> %csr 20:12
> %rm 12:3
> -
> +%nf 29:3
> # immediates:
> %imm_i 20:s12
> %imm_s 25:s7 7:5
> @@ -43,7 +43,6 @@
> &u imm rd
> &shift shamt rs1 rd
> &atomic aq rl rs2 rs1 rd
> -
> # Formats 32:
> @r ....... ..... ..... ... ..... ....... &r
> %rs2 %rs1 %rd
> @i ............ ..... ... ..... ....... &i imm=%imm_i
> %rs1 %rd
> @@ -62,11 +61,17 @@
> @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
> @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd
> @r2 ....... ..... ..... ... ..... ....... %rs1 %rd
> +@r_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
> +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
> +@r_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
> +@r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd
> +@r2_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rd
> +@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd
> +@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
>
> @sfence_vma ....... ..... ..... ... ..... ....... %rs2 %rs1
> @sfence_vm ....... ..... ..... ... ..... ....... %rs1
>
> -
> # *** Privileged Instructions ***
> ecall 000000000000 00000 000 00000 1110011
> ebreak 000000000001 00000 000 00000 1110011
> @@ -203,3 +208,366 @@ fcvt_w_d 1100001 00000 ..... ... .....
> 1010011 @r2_rm
> fcvt_wu_d 1100001 00001 ..... ... ..... 1010011 @r2_rm
> fcvt_d_w 1101001 00000 ..... ... ..... 1010011 @r2_rm
> fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm
> +
> +# *** RV32V Standard Extension ***
> +
> +# *** Vector loads and stores are encoded within LOADFP/STORE-FP ***
> +vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm
> +vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm
> +vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm
> +vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm
> +vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm
> +vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm
> +vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm
> +vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm
> +vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm
> +vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm
> +vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm
> +vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm
> +vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm
> +vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm
> +vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm
> +vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm
> +vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm
> +vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm
> +
> +vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm
> +vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm
> +vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm
> +vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm
> +vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm
> +vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm
> +vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm
> +vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm
> +vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm
> +vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm
> +vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm
> +
> +vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm
> +vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm
> +vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm
> +vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm
> +vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm
> +vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm
> +vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm
> +vsxb_v ... 011 . ..... ..... 000 ..... 0100111 @r_nfvm
> +vsxh_v ... 011 . ..... ..... 101 ..... 0100111 @r_nfvm
> +vsxw_v ... 011 . ..... ..... 110 ..... 0100111 @r_nfvm
> +vsxe_v ... 011 . ..... ..... 111 ..... 0100111 @r_nfvm
> +vsuxb_v ... 111 . ..... ..... 000 ..... 0100111 @r_nfvm
> +vsuxh_v ... 111 . ..... ..... 101 ..... 0100111 @r_nfvm
> +vsuxw_v ... 111 . ..... ..... 110 ..... 0100111 @r_nfvm
> +vsuxe_v ... 111 . ..... ..... 111 ..... 0100111 @r_nfvm
> +
> +#*** Vector AMO operations are encoded under the standard AMO
> major opcode.***
> +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +
> +#*** new major opcode OP-V ***
> +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm
> +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm
> +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm
> +vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm
> +vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm
> +vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm
> +vfredsum_vs 000001 . ..... ..... 001 ..... 1010111 @r_vm
> +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm
> +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm
> +vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm
> +vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm
> +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm
> +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm
> +vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm
> +vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm
> +vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm
> +vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm
> +vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm
> +vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm
> +vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm
> +vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm
> +vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm
> +vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm
> +vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm
> +vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm
> +vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm
> +vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm
> +vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm
> +vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm
> +vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm
> +vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm
> +vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm
> +vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm
> +vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm
> +vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm
> +vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm
> +vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm
> +vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm
> +vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm
> +vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm
> +vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm
> +vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm
> +vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm
> +vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm
> +vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm
> +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r
> +vfmv_f_s 001100 1 ..... ..... 001 ..... 1010111 @r
> +vmv_s_x 001101 1 ..... ..... 110 ..... 1010111 @r
> +vfmv_s_f 001101 1 ..... ..... 101 ..... 1010111 @r
> +vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm
> +vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm
> +vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm
> +vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm
> +vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm
> +vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm
> +vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r
> +vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r
> +vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r
> +vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r
> +vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r
> +vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r
> +vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r
> +vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r
> +vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r
> +vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r
> +vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm
> +vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm
> +vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm
> +vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm
> +vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm
> +viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm
> +vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm
> +vmerge_vvm 010111 . ..... ..... 000 ..... 1010111 @r_vm
> +vmerge_vxm 010111 . ..... ..... 100 ..... 1010111 @r_vm
> +vmerge_vim 010111 . ..... ..... 011 ..... 1010111 @r_vm
> +vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r
> +vfmerge_vfm 010111 . ..... ..... 101 ..... 1010111 @r_vm
> +vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm
> +vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm
> +vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm
> +vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r
> +vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm
> +vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm
> +vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm
> +vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r
> +vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm
> +vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm
> +vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm
> +vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r
> +vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm
> +vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm
> +vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm
> +vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm
> +vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r
> +vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm
> +vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm
> +vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm
> +vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r
> +vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm
> +vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm
> +vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm
> +vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r
> +vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm
> +vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r
> +vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm
> +vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r
> +vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm
> +vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm
> +vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm
> +vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm
> +vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm
> +vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm
> +vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm
> +vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm
> +vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm
> +vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm
> +vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm
> +vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm
> +vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm
> +vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm
> +vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm
> +vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm
> +vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm
> +vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm
> +vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm
> +vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm
> +vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm
> +vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm
> +vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm
> +vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm
> +vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm
> +vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm
> +vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm
> +vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm
> +vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm
> +vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm
> +vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm
> +vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm
> +vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm
> +vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm
> +vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm
> +vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm
> +vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm
> +vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm
> +vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm
> +vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm
> +vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm
> +vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm
> +vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm
> +vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm
> +vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm
> +vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm
> +vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm
> +vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm
> +vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm
> +vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm
> +vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm
> +vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm
> +vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm
> +vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm
> +vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm
> +vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm
> +vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm
> +vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm
> +vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm
> +vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm
> +vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm
> +vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm
> +vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm
> +vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm
> +vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm
> +vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm
> +vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm
> +vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm
> +vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm
> +vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm
> +vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm
> +vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm
> +vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm
> +vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm
> +vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm
> +vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm
> +vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm
> +vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm
> +vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm
> +vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm
> +vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm
> +vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm
> +vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm
> +vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm
> +vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm
> +vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm
> +vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm
> +vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm
> +vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm
> +vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm
> +vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm
> +vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm
> +vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm
> +vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm
> +vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm
> +vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm
> +vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm
> +vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm
> +vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm
> +vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm
> +vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm
> +vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm
> +vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm
> +vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm
> +vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm
> +vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm
> +vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwredsum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm
> +vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm
> +vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm
> +vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm
> +vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm
> +vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm
> +vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm
> +vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm
> +vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm
> +vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm
> +vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm
> +vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm
> +vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm
> +vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm
> +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm
> +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm
> +vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm
> +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm
> +vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm
> +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm
> +vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm
> +vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm
> +vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
> +vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
> diff --git a/target/riscv/insn_trans/trans_rvv.inc.c
> b/target/riscv/insn_trans/trans_rvv.inc.c
> new file mode 100644
> index 0000000..dc8e6ce
> --- /dev/null
> +++ b/target/riscv/insn_trans/trans_rvv.inc.c
> @@ -0,0 +1,484 @@
> +/*
> + * RISC-V translation routines for the RVV Standard Extension.
> + *
> + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but
> WITHOUT
> + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
> License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public
> License along with
> + * this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#define GEN_VECTOR_R2_NFVM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 nf = tcg_const_i32(a->nf); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, d); \
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(nf); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +#define GEN_VECTOR_R_NFVM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 nf = tcg_const_i32(a->nf); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, s2, d);\
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(nf); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +
> +#define GEN_VECTOR_R_WDVM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 wd = tcg_const_i32(a->wd); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, wd, vm, s1, s2, d);\
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(wd); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +#define GEN_VECTOR_R(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + gen_helper_vector_##INSN(cpu_env, s1, s2, d); \
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + return true; \
> +}
> +#define GEN_VECTOR_R2_VM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, vm, s2, d); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +
> +#define GEN_VECTOR_R1_VM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, vm, d); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +#define GEN_VECTOR_R_VM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, vm, s1, s2, d); \
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +#define GEN_VECTOR_R2_ZIMM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 zimm = tcg_const_i32(a->zimm); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + gen_helper_vector_##INSN(cpu_env, s1, zimm, d); \
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(zimm); \
> + tcg_temp_free_i32(d); \
> + return true; \
> +}
> +
> +GEN_VECTOR_R2_NFVM(vlb_v)
> +GEN_VECTOR_R2_NFVM(vlh_v)
> +GEN_VECTOR_R2_NFVM(vlw_v)
> +GEN_VECTOR_R2_NFVM(vle_v)
> +GEN_VECTOR_R2_NFVM(vlbu_v)
> +GEN_VECTOR_R2_NFVM(vlhu_v)
> +GEN_VECTOR_R2_NFVM(vlwu_v)
> +GEN_VECTOR_R2_NFVM(vlbff_v)
> +GEN_VECTOR_R2_NFVM(vlhff_v)
> +GEN_VECTOR_R2_NFVM(vlwff_v)
> +GEN_VECTOR_R2_NFVM(vleff_v)
> +GEN_VECTOR_R2_NFVM(vlbuff_v)
> +GEN_VECTOR_R2_NFVM(vlhuff_v)
> +GEN_VECTOR_R2_NFVM(vlwuff_v)
> +GEN_VECTOR_R2_NFVM(vsb_v)
> +GEN_VECTOR_R2_NFVM(vsh_v)
> +GEN_VECTOR_R2_NFVM(vsw_v)
> +GEN_VECTOR_R2_NFVM(vse_v)
> +
> +GEN_VECTOR_R_NFVM(vlsb_v)
> +GEN_VECTOR_R_NFVM(vlsh_v)
> +GEN_VECTOR_R_NFVM(vlsw_v)
> +GEN_VECTOR_R_NFVM(vlse_v)
> +GEN_VECTOR_R_NFVM(vlsbu_v)
> +GEN_VECTOR_R_NFVM(vlshu_v)
> +GEN_VECTOR_R_NFVM(vlswu_v)
> +GEN_VECTOR_R_NFVM(vssb_v)
> +GEN_VECTOR_R_NFVM(vssh_v)
> +GEN_VECTOR_R_NFVM(vssw_v)
> +GEN_VECTOR_R_NFVM(vsse_v)
> +GEN_VECTOR_R_NFVM(vlxb_v)
> +GEN_VECTOR_R_NFVM(vlxh_v)
> +GEN_VECTOR_R_NFVM(vlxw_v)
> +GEN_VECTOR_R_NFVM(vlxe_v)
> +GEN_VECTOR_R_NFVM(vlxbu_v)
> +GEN_VECTOR_R_NFVM(vlxhu_v)
> +GEN_VECTOR_R_NFVM(vlxwu_v)
> +GEN_VECTOR_R_NFVM(vsxb_v)
> +GEN_VECTOR_R_NFVM(vsxh_v)
> +GEN_VECTOR_R_NFVM(vsxw_v)
> +GEN_VECTOR_R_NFVM(vsxe_v)
> +GEN_VECTOR_R_NFVM(vsuxb_v)
> +GEN_VECTOR_R_NFVM(vsuxh_v)
> +GEN_VECTOR_R_NFVM(vsuxw_v)
> +GEN_VECTOR_R_NFVM(vsuxe_v)
> +
> +GEN_VECTOR_R_WDVM(vamoswapw_v)
> +GEN_VECTOR_R_WDVM(vamoswapd_v)
> +GEN_VECTOR_R_WDVM(vamoaddw_v)
> +GEN_VECTOR_R_WDVM(vamoaddd_v)
> +GEN_VECTOR_R_WDVM(vamoxorw_v)
> +GEN_VECTOR_R_WDVM(vamoxord_v)
> +GEN_VECTOR_R_WDVM(vamoandw_v)
> +GEN_VECTOR_R_WDVM(vamoandd_v)
> +GEN_VECTOR_R_WDVM(vamoorw_v)
> +GEN_VECTOR_R_WDVM(vamoord_v)
> +GEN_VECTOR_R_WDVM(vamominw_v)
> +GEN_VECTOR_R_WDVM(vamomind_v)
> +GEN_VECTOR_R_WDVM(vamomaxw_v)
> +GEN_VECTOR_R_WDVM(vamomaxd_v)
> +GEN_VECTOR_R_WDVM(vamominuw_v)
> +GEN_VECTOR_R_WDVM(vamominud_v)
> +GEN_VECTOR_R_WDVM(vamomaxuw_v)
> +GEN_VECTOR_R_WDVM(vamomaxud_v)
> +
> +GEN_VECTOR_R(vext_x_v)
> +GEN_VECTOR_R(vfmv_f_s)
> +GEN_VECTOR_R(vmv_s_x)
> +GEN_VECTOR_R(vfmv_s_f)
> +GEN_VECTOR_R(vadc_vvm)
> +GEN_VECTOR_R(vadc_vxm)
> +GEN_VECTOR_R(vadc_vim)
> +GEN_VECTOR_R(vmadc_vvm)
> +GEN_VECTOR_R(vmadc_vxm)
> +GEN_VECTOR_R(vmadc_vim)
> +GEN_VECTOR_R(vsbc_vvm)
> +GEN_VECTOR_R(vsbc_vxm)
> +GEN_VECTOR_R(vmsbc_vvm)
> +GEN_VECTOR_R(vmsbc_vxm)
> +GEN_VECTOR_R2_VM(vmpopc_m)
> +GEN_VECTOR_R2_VM(vmfirst_m)
> +GEN_VECTOR_R(vcompress_vm)
> +GEN_VECTOR_R(vmandnot_mm)
> +GEN_VECTOR_R(vmand_mm)
> +GEN_VECTOR_R(vmor_mm)
> +GEN_VECTOR_R(vmxor_mm)
> +GEN_VECTOR_R(vmornot_mm)
> +GEN_VECTOR_R(vmnand_mm)
> +GEN_VECTOR_R(vmnor_mm)
> +GEN_VECTOR_R(vmxnor_mm)
> +GEN_VECTOR_R2_VM(vmsbf_m)
> +GEN_VECTOR_R2_VM(vmsof_m)
> +GEN_VECTOR_R2_VM(vmsif_m)
> +GEN_VECTOR_R2_VM(viota_m)
> +GEN_VECTOR_R1_VM(vid_v)
> +GEN_VECTOR_R2_VM(vfcvt_xu_f_v)
> +GEN_VECTOR_R2_VM(vfcvt_x_f_v)
> +GEN_VECTOR_R2_VM(vfcvt_f_xu_v)
> +GEN_VECTOR_R2_VM(vfcvt_f_x_v)
> +GEN_VECTOR_R2_VM(vfwcvt_xu_f_v)
> +GEN_VECTOR_R2_VM(vfwcvt_x_f_v)
> +GEN_VECTOR_R2_VM(vfwcvt_f_xu_v)
> +GEN_VECTOR_R2_VM(vfwcvt_f_x_v)
> +GEN_VECTOR_R2_VM(vfwcvt_f_f_v)
> +GEN_VECTOR_R2_VM(vfncvt_xu_f_v)
> +GEN_VECTOR_R2_VM(vfncvt_x_f_v)
> +GEN_VECTOR_R2_VM(vfncvt_f_xu_v)
> +GEN_VECTOR_R2_VM(vfncvt_f_x_v)
> +GEN_VECTOR_R2_VM(vfncvt_f_f_v)
> +GEN_VECTOR_R2_VM(vfsqrt_v)
> +GEN_VECTOR_R2_VM(vfclass_v)
> +
> +GEN_VECTOR_R_VM(vadd_vv)
> +GEN_VECTOR_R_VM(vadd_vx)
> +GEN_VECTOR_R_VM(vadd_vi)
> +GEN_VECTOR_R_VM(vredsum_vs)
> +GEN_VECTOR_R_VM(vfadd_vv)
> +GEN_VECTOR_R_VM(vfadd_vf)
> +GEN_VECTOR_R_VM(vredand_vs)
> +GEN_VECTOR_R_VM(vfredsum_vs)
> +GEN_VECTOR_R_VM(vsub_vv)
> +GEN_VECTOR_R_VM(vsub_vx)
> +GEN_VECTOR_R_VM(vredor_vs)
> +GEN_VECTOR_R_VM(vfsub_vv)
> +GEN_VECTOR_R_VM(vfsub_vf)
> +GEN_VECTOR_R_VM(vrsub_vx)
> +GEN_VECTOR_R_VM(vrsub_vi)
> +GEN_VECTOR_R_VM(vredxor_vs)
> +GEN_VECTOR_R_VM(vfredosum_vs)
> +GEN_VECTOR_R_VM(vminu_vv)
> +GEN_VECTOR_R_VM(vminu_vx)
> +GEN_VECTOR_R_VM(vredminu_vs)
> +GEN_VECTOR_R_VM(vfmin_vv)
> +GEN_VECTOR_R_VM(vfmin_vf)
> +GEN_VECTOR_R_VM(vmin_vv)
> +GEN_VECTOR_R_VM(vmin_vx)
> +GEN_VECTOR_R_VM(vredmin_vs)
> +GEN_VECTOR_R_VM(vfredmin_vs)
> +GEN_VECTOR_R_VM(vmaxu_vv)
> +GEN_VECTOR_R_VM(vmaxu_vx)
> +GEN_VECTOR_R_VM(vredmaxu_vs)
> +GEN_VECTOR_R_VM(vfmax_vv)
> +GEN_VECTOR_R_VM(vfmax_vf)
> +GEN_VECTOR_R_VM(vmax_vv)
> +GEN_VECTOR_R_VM(vmax_vx)
> +GEN_VECTOR_R_VM(vredmax_vs)
> +GEN_VECTOR_R_VM(vfredmax_vs)
> +GEN_VECTOR_R_VM(vfsgnj_vv)
> +GEN_VECTOR_R_VM(vfsgnj_vf)
> +GEN_VECTOR_R_VM(vand_vv)
> +GEN_VECTOR_R_VM(vand_vx)
> +GEN_VECTOR_R_VM(vand_vi)
> +GEN_VECTOR_R_VM(vfsgnjn_vv)
> +GEN_VECTOR_R_VM(vfsgnjn_vf)
> +GEN_VECTOR_R_VM(vor_vv)
> +GEN_VECTOR_R_VM(vor_vx)
> +GEN_VECTOR_R_VM(vor_vi)
> +GEN_VECTOR_R_VM(vfsgnjx_vv)
> +GEN_VECTOR_R_VM(vfsgnjx_vf)
> +GEN_VECTOR_R_VM(vxor_vv)
> +GEN_VECTOR_R_VM(vxor_vx)
> +GEN_VECTOR_R_VM(vxor_vi)
> +GEN_VECTOR_R_VM(vrgather_vv)
> +GEN_VECTOR_R_VM(vrgather_vx)
> +GEN_VECTOR_R_VM(vrgather_vi)
> +GEN_VECTOR_R_VM(vslideup_vx)
> +GEN_VECTOR_R_VM(vslideup_vi)
> +GEN_VECTOR_R_VM(vslide1up_vx)
> +GEN_VECTOR_R_VM(vslidedown_vx)
> +GEN_VECTOR_R_VM(vslidedown_vi)
> +GEN_VECTOR_R_VM(vslide1down_vx)
> +GEN_VECTOR_R_VM(vmerge_vvm)
> +GEN_VECTOR_R_VM(vmerge_vxm)
> +GEN_VECTOR_R_VM(vmerge_vim)
> +GEN_VECTOR_R_VM(vfmerge_vfm)
> +GEN_VECTOR_R_VM(vmseq_vv)
> +GEN_VECTOR_R_VM(vmseq_vx)
> +GEN_VECTOR_R_VM(vmseq_vi)
> +GEN_VECTOR_R_VM(vmfeq_vv)
> +GEN_VECTOR_R_VM(vmfeq_vf)
> +GEN_VECTOR_R_VM(vmsne_vv)
> +GEN_VECTOR_R_VM(vmsne_vx)
> +GEN_VECTOR_R_VM(vmsne_vi)
> +GEN_VECTOR_R_VM(vmfle_vv)
> +GEN_VECTOR_R_VM(vmfle_vf)
> +GEN_VECTOR_R_VM(vmsltu_vv)
> +GEN_VECTOR_R_VM(vmsltu_vx)
> +GEN_VECTOR_R_VM(vmford_vv)
> +GEN_VECTOR_R_VM(vmford_vf)
> +GEN_VECTOR_R_VM(vmslt_vv)
> +GEN_VECTOR_R_VM(vmslt_vx)
> +GEN_VECTOR_R_VM(vmflt_vv)
> +GEN_VECTOR_R_VM(vmflt_vf)
> +GEN_VECTOR_R_VM(vmsleu_vv)
> +GEN_VECTOR_R_VM(vmsleu_vx)
> +GEN_VECTOR_R_VM(vmsleu_vi)
> +GEN_VECTOR_R_VM(vmfne_vv)
> +GEN_VECTOR_R_VM(vmfne_vf)
> +GEN_VECTOR_R_VM(vmsle_vv)
> +GEN_VECTOR_R_VM(vmsle_vx)
> +GEN_VECTOR_R_VM(vmsle_vi)
> +GEN_VECTOR_R_VM(vmfgt_vf)
> +GEN_VECTOR_R_VM(vmsgtu_vx)
> +GEN_VECTOR_R_VM(vmsgtu_vi)
> +GEN_VECTOR_R_VM(vmsgt_vx)
> +GEN_VECTOR_R_VM(vmsgt_vi)
> +GEN_VECTOR_R_VM(vmfge_vf)
> +GEN_VECTOR_R_VM(vsaddu_vv)
> +GEN_VECTOR_R_VM(vsaddu_vx)
> +GEN_VECTOR_R_VM(vsaddu_vi)
> +GEN_VECTOR_R_VM(vdivu_vv)
> +GEN_VECTOR_R_VM(vdivu_vx)
> +GEN_VECTOR_R_VM(vfdiv_vv)
> +GEN_VECTOR_R_VM(vfdiv_vf)
> +GEN_VECTOR_R_VM(vsadd_vv)
> +GEN_VECTOR_R_VM(vsadd_vx)
> +GEN_VECTOR_R_VM(vsadd_vi)
> +GEN_VECTOR_R_VM(vdiv_vv)
> +GEN_VECTOR_R_VM(vdiv_vx)
> +GEN_VECTOR_R_VM(vfrdiv_vf)
> +GEN_VECTOR_R_VM(vssubu_vv)
> +GEN_VECTOR_R_VM(vssubu_vx)
> +GEN_VECTOR_R_VM(vremu_vv)
> +GEN_VECTOR_R_VM(vremu_vx)
> +GEN_VECTOR_R_VM(vssub_vv)
> +GEN_VECTOR_R_VM(vssub_vx)
> +GEN_VECTOR_R_VM(vrem_vv)
> +GEN_VECTOR_R_VM(vrem_vx)
> +GEN_VECTOR_R_VM(vaadd_vv)
> +GEN_VECTOR_R_VM(vaadd_vx)
> +GEN_VECTOR_R_VM(vaadd_vi)
> +GEN_VECTOR_R_VM(vmulhu_vv)
> +GEN_VECTOR_R_VM(vmulhu_vx)
> +GEN_VECTOR_R_VM(vfmul_vv)
> +GEN_VECTOR_R_VM(vfmul_vf)
> +GEN_VECTOR_R_VM(vsll_vv)
> +GEN_VECTOR_R_VM(vsll_vx)
> +GEN_VECTOR_R_VM(vsll_vi)
> +GEN_VECTOR_R_VM(vmul_vv)
> +GEN_VECTOR_R_VM(vmul_vx)
> +GEN_VECTOR_R_VM(vasub_vv)
> +GEN_VECTOR_R_VM(vasub_vx)
> +GEN_VECTOR_R_VM(vmulhsu_vv)
> +GEN_VECTOR_R_VM(vmulhsu_vx)
> +GEN_VECTOR_R_VM(vsmul_vv)
> +GEN_VECTOR_R_VM(vsmul_vx)
> +GEN_VECTOR_R_VM(vmulh_vv)
> +GEN_VECTOR_R_VM(vmulh_vx)
> +GEN_VECTOR_R_VM(vfrsub_vf)
> +GEN_VECTOR_R_VM(vsrl_vv)
> +GEN_VECTOR_R_VM(vsrl_vx)
> +GEN_VECTOR_R_VM(vsrl_vi)
> +GEN_VECTOR_R_VM(vfmadd_vv)
> +GEN_VECTOR_R_VM(vfmadd_vf)
> +GEN_VECTOR_R_VM(vsra_vv)
> +GEN_VECTOR_R_VM(vsra_vx)
> +GEN_VECTOR_R_VM(vsra_vi)
> +GEN_VECTOR_R_VM(vmadd_vv)
> +GEN_VECTOR_R_VM(vmadd_vx)
> +GEN_VECTOR_R_VM(vfnmadd_vv)
> +GEN_VECTOR_R_VM(vfnmadd_vf)
> +GEN_VECTOR_R_VM(vssrl_vv)
> +GEN_VECTOR_R_VM(vssrl_vx)
> +GEN_VECTOR_R_VM(vssrl_vi)
> +GEN_VECTOR_R_VM(vfmsub_vv)
> +GEN_VECTOR_R_VM(vfmsub_vf)
> +GEN_VECTOR_R_VM(vssra_vv)
> +GEN_VECTOR_R_VM(vssra_vx)
> +GEN_VECTOR_R_VM(vssra_vi)
> +GEN_VECTOR_R_VM(vnmsub_vv)
> +GEN_VECTOR_R_VM(vnmsub_vx)
> +GEN_VECTOR_R_VM(vfnmsub_vv)
> +GEN_VECTOR_R_VM(vfnmsub_vf)
> +GEN_VECTOR_R_VM(vnsrl_vv)
> +GEN_VECTOR_R_VM(vnsrl_vx)
> +GEN_VECTOR_R_VM(vnsrl_vi)
> +GEN_VECTOR_R_VM(vfmacc_vv)
> +GEN_VECTOR_R_VM(vfmacc_vf)
> +GEN_VECTOR_R_VM(vnsra_vv)
> +GEN_VECTOR_R_VM(vnsra_vx)
> +GEN_VECTOR_R_VM(vnsra_vi)
> +GEN_VECTOR_R_VM(vmacc_vv)
> +GEN_VECTOR_R_VM(vmacc_vx)
> +GEN_VECTOR_R_VM(vfnmacc_vv)
> +GEN_VECTOR_R_VM(vfnmacc_vf)
> +GEN_VECTOR_R_VM(vnclipu_vv)
> +GEN_VECTOR_R_VM(vnclipu_vx)
> +GEN_VECTOR_R_VM(vnclipu_vi)
> +GEN_VECTOR_R_VM(vfmsac_vv)
> +GEN_VECTOR_R_VM(vfmsac_vf)
> +GEN_VECTOR_R_VM(vnclip_vv)
> +GEN_VECTOR_R_VM(vnclip_vx)
> +GEN_VECTOR_R_VM(vnclip_vi)
> +GEN_VECTOR_R_VM(vnmsac_vv)
> +GEN_VECTOR_R_VM(vnmsac_vx)
> +GEN_VECTOR_R_VM(vfnmsac_vv)
> +GEN_VECTOR_R_VM(vfnmsac_vf)
> +GEN_VECTOR_R_VM(vwredsumu_vs)
> +GEN_VECTOR_R_VM(vwaddu_vv)
> +GEN_VECTOR_R_VM(vwaddu_vx)
> +GEN_VECTOR_R_VM(vfwadd_vv)
> +GEN_VECTOR_R_VM(vfwadd_vf)
> +GEN_VECTOR_R_VM(vwredsum_vs)
> +GEN_VECTOR_R_VM(vwadd_vv)
> +GEN_VECTOR_R_VM(vwadd_vx)
> +GEN_VECTOR_R_VM(vfwredsum_vs)
> +GEN_VECTOR_R_VM(vwsubu_vv)
> +GEN_VECTOR_R_VM(vwsubu_vx)
> +GEN_VECTOR_R_VM(vfwsub_vv)
> +GEN_VECTOR_R_VM(vfwsub_vf)
> +GEN_VECTOR_R_VM(vwsub_vv)
> +GEN_VECTOR_R_VM(vwsub_vx)
> +GEN_VECTOR_R_VM(vfwredosum_vs)
> +GEN_VECTOR_R_VM(vwaddu_wv)
> +GEN_VECTOR_R_VM(vwaddu_wx)
> +GEN_VECTOR_R_VM(vfwadd_wv)
> +GEN_VECTOR_R_VM(vfwadd_wf)
> +GEN_VECTOR_R_VM(vwadd_wv)
> +GEN_VECTOR_R_VM(vwadd_wx)
> +GEN_VECTOR_R_VM(vwsubu_wv)
> +GEN_VECTOR_R_VM(vwsubu_wx)
> +GEN_VECTOR_R_VM(vfwsub_wv)
> +GEN_VECTOR_R_VM(vfwsub_wf)
> +GEN_VECTOR_R_VM(vwsub_wv)
> +GEN_VECTOR_R_VM(vwsub_wx)
> +GEN_VECTOR_R_VM(vwmulu_vv)
> +GEN_VECTOR_R_VM(vwmulu_vx)
> +GEN_VECTOR_R_VM(vfwmul_vv)
> +GEN_VECTOR_R_VM(vfwmul_vf)
> +GEN_VECTOR_R_VM(vwmulsu_vv)
> +GEN_VECTOR_R_VM(vwmulsu_vx)
> +GEN_VECTOR_R_VM(vwmul_vv)
> +GEN_VECTOR_R_VM(vwmul_vx)
> +GEN_VECTOR_R_VM(vwsmaccu_vv)
> +GEN_VECTOR_R_VM(vwsmaccu_vx)
> +GEN_VECTOR_R_VM(vwmaccu_vv)
> +GEN_VECTOR_R_VM(vwmaccu_vx)
> +GEN_VECTOR_R_VM(vfwmacc_vv)
> +GEN_VECTOR_R_VM(vfwmacc_vf)
> +GEN_VECTOR_R_VM(vwsmacc_vv)
> +GEN_VECTOR_R_VM(vwsmacc_vx)
> +GEN_VECTOR_R_VM(vwmacc_vv)
> +GEN_VECTOR_R_VM(vwmacc_vx)
> +GEN_VECTOR_R_VM(vfwnmacc_vv)
> +GEN_VECTOR_R_VM(vfwnmacc_vf)
> +GEN_VECTOR_R_VM(vwsmaccsu_vv)
> +GEN_VECTOR_R_VM(vwsmaccsu_vx)
> +GEN_VECTOR_R_VM(vwmaccsu_vv)
> +GEN_VECTOR_R_VM(vwmaccsu_vx)
> +GEN_VECTOR_R_VM(vfwmsac_vv)
> +GEN_VECTOR_R_VM(vfwmsac_vf)
> +GEN_VECTOR_R_VM(vwsmaccus_vx)
> +GEN_VECTOR_R_VM(vwmaccus_vx)
> +GEN_VECTOR_R_VM(vfwnmsac_vv)
> +GEN_VECTOR_R_VM(vfwnmsac_vf)
> +GEN_VECTOR_R2_ZIMM(vsetvli)
> +GEN_VECTOR_R(vsetvl)
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 8d6ab73..587c23e 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -706,6 +706,7 @@ static bool gen_shift(DisasContext *ctx, arg_r *a,
> #include "insn_trans/trans_rva.inc.c"
> #include "insn_trans/trans_rvf.inc.c"
> #include "insn_trans/trans_rvd.inc.c"
> +#include "insn_trans/trans_rvv.inc.c"
> #include "insn_trans/trans_privileged.inc.c"
>
> /*
> diff --git a/target/riscv/vector_helper.c
> b/target/riscv/vector_helper.c
> new file mode 100644
> index 0000000..1f8f1ec
> --- /dev/null
> +++ b/target/riscv/vector_helper.c
> @@ -0,0 +1,26563 @@
> +/*
> + * RISC-V Vectore Extension Helpers for QEMU.
> + *
> + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but
> WITHOUT
> + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
> License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public
> License along with
> + * this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/log.h"
> +#include "cpu.h"
> +#include "qemu/main-loop.h"
> +#include "exec/exec-all.h"
> +#include "exec/helper-proto.h"
> +#include "exec/translator.h"
> +#include "exec/cpu_ldst.h"
> +#include <math.h>
> +#include "instmap.h"
> +
> +#define VECTOR_HELPER(name) HELPER(glue(vector_, name))
> +#define SIGNBIT8 (1 << 7)
> +#define MAX_U8 ((uint8_t)0xff)
> +#define MIN_U8 ((uint8_t)0x0)
> +#define MAX_S8 ((int8_t)0x7f)
> +#define MIN_S8 ((int8_t)0x80)
> +#define SIGNBIT16 (1 << 15)
> +#define MAX_U16 ((uint16_t)0xffff)
> +#define MIN_U16 ((uint16_t)0x0)
> +#define MAX_S16 ((int16_t)0x7fff)
> +#define MIN_S16 ((int16_t)0x8000)
> +#define SIGNBIT32 (1 << 31)
> +#define MAX_U32 ((uint32_t)0xffffffff)
> +#define MIN_U32 ((uint32_t)0x0)
> +#define MAX_S32 ((int32_t)0x7fffffff)
> +#define MIN_S32 ((int32_t)0x80000000)
> +#define SIGNBIT64 ((uint64_t)1 << 63)
> +#define MAX_U64 ((uint64_t)0xffffffffffffffff)
> +#define MIN_U64 ((uint64_t)0x0)
> +#define MAX_S64 ((int64_t)0x7fffffffffffffff)
> +#define MIN_S64 ((int64_t)0x8000000000000000)
> +
> +static int64_t sign_extend(int64_t a, int8_t width)
> +{
> + return a << (64 - width) >> (64 - width);
> +}
> +
> +static int64_t extend_gpr(target_ulong reg)
> +{
> + return sign_extend(reg, sizeof(target_ulong) * 8);
> +}
> +
> +static target_ulong vector_get_index(CPURISCVState *env, int rs1,
> int rs2,
> + int index, int mem, int width, int nf)
> +{
> + target_ulong abs_off, base = env->gpr[rs1];
> + target_long offset;
> + switch (width) {
> + case 8:
> + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) +
> nf * mem;
> + break;
> + case 16:
> + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) +
> nf * mem;
> + break;
> + case 32:
> + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) +
> nf * mem;
> + break;
> + case 64:
> + offset = env->vfp.vreg[rs2].s64[index] + nf * mem;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return 0;
> + }
> + if (offset < 0) {
> + abs_off = ~offset + 1;
> + if (base >= abs_off) {
> + return base - abs_off;
> + }
> + } else {
> + if ((target_ulong)((target_ulong)offset + base) >= base) {
> + return (target_ulong)offset + base;
> + }
> + }
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return 0;
> +}
> +
> +
> +
> +/* ADD/SUB/COMPARE instructions. */
> +static inline uint8_t sat_add_u8(CPURISCVState *env, uint8_t a,
> uint8_t b)
> +{
> + uint8_t res = a + b;
> + if (res < a) {
> + res = MAX_U8;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint16_t sat_add_u16(CPURISCVState *env, uint16_t
> a, uint16_t b)
> +{
> + uint16_t res = a + b;
> + if (res < a) {
> + res = MAX_U16;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint32_t sat_add_u32(CPURISCVState *env, uint32_t
> a, uint32_t b)
> +{
> + uint32_t res = a + b;
> + if (res < a) {
> + res = MAX_U32;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint64_t sat_add_u64(CPURISCVState *env, uint64_t
> a, uint64_t b)
> +{
> + uint64_t res = a + b;
> + if (res < a) {
> + res = MAX_U64;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint8_t sat_add_s8(CPURISCVState *env, uint8_t a,
> uint8_t b)
> +{
> + uint8_t res = a + b;
> + if (((res ^ a) & SIGNBIT8) && !((a ^ b) & SIGNBIT8)) {
> + res = ~(((int8_t)a >> 7) ^ SIGNBIT8);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint16_t sat_add_s16(CPURISCVState *env, uint16_t
> a, uint16_t b)
> +{
> + uint16_t res = a + b;
> + if (((res ^ a) & SIGNBIT16) && !((a ^ b) & SIGNBIT16)) {
> + res = ~(((int16_t)a >> 15) ^ SIGNBIT16);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint32_t sat_add_s32(CPURISCVState *env, uint32_t
> a, uint32_t b)
> +{
> + uint32_t res = a + b;
> + if (((res ^ a) & SIGNBIT32) && !((a ^ b) & SIGNBIT32)) {
> + res = ~(((int32_t)a >> 31) ^ SIGNBIT32);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint64_t sat_add_s64(CPURISCVState *env, uint64_t
> a, uint64_t b)
> +{
> + uint64_t res = a + b;
> + if (((res ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
> + res = ~(((int64_t)a >> 63) ^ SIGNBIT64);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint8_t sat_sub_u8(CPURISCVState *env, uint8_t a,
> uint8_t b)
> +{
> + uint8_t res = a - b;
> + if (res > a) {
> + res = 0;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint16_t sat_sub_u16(CPURISCVState *env, uint16_t
> a, uint16_t b)
> +{
> + uint16_t res = a - b;
> + if (res > a) {
> + res = 0;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint32_t sat_sub_u32(CPURISCVState *env, uint32_t
> a, uint32_t b)
> +{
> + uint32_t res = a - b;
> + if (res > a) {
> + res = 0;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint64_t sat_sub_u64(CPURISCVState *env, uint64_t
> a, uint64_t b)
> +{
> + uint64_t res = a - b;
> + if (res > a) {
> + res = 0;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint8_t sat_sub_s8(CPURISCVState *env, uint8_t a,
> uint8_t b)
> +{
> + uint8_t res = a - b;
> + if (((res ^ a) & SIGNBIT8) && ((a ^ b) & SIGNBIT8)) {
> + res = ~(((int8_t)a >> 7) ^ SIGNBIT8);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint16_t sat_sub_s16(CPURISCVState *env, uint16_t
> a, uint16_t b)
> +{
> + uint16_t res = a - b;
> + if (((res ^ a) & SIGNBIT16) && ((a ^ b) & SIGNBIT16)) {
> + res = ~(((int16_t)a >> 15) ^ SIGNBIT16);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint32_t sat_sub_s32(CPURISCVState *env, uint32_t
> a, uint32_t b)
> +{
> + uint32_t res = a - b;
> + if (((res ^ a) & SIGNBIT32) && ((a ^ b) & SIGNBIT32)) {
> + res = ~(((int32_t)a >> 31) ^ SIGNBIT32);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint64_t sat_sub_s64(CPURISCVState *env, uint64_t
> a, uint64_t b)
> +{
> + uint64_t res = a - b;
> + if (((res ^ a) & SIGNBIT64) && ((a ^ b) & SIGNBIT64)) {
> + res = ~(((int64_t)a >> 63) ^ SIGNBIT64);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static uint64_t fix_data_round(CPURISCVState *env, uint64_t result,
> + uint8_t shift)
> +{
> + uint64_t lsb_1 = (uint64_t)1 << shift;
> + int mod = env->vfp.vxrm;
> + int mask = ((uint64_t)1 << shift) - 1;
> +
> + if (mod == 0x0) { /* rnu */
> + return lsb_1 >> 1;
> + } else if (mod == 0x1) { /* rne */
> + if ((result & mask) > (lsb_1 >> 1) ||
> + (((result & mask) == (lsb_1 >> 1)) &&
> + (((result >> shift) & 0x1)) == 1)) {
> + return lsb_1 >> 1;
> + }
> + } else if (mod == 0x3) { /* rod */
> + if (((result & mask) >= 0x1) && (((result >> shift) &
> 0x1) == 0)) {
> + return lsb_1;
> + }
> + }
> + return 0;
> +}
> +
> +static int8_t saturate_s8(CPURISCVState *env, int16_t res)
> +{
> + if (res > MAX_S8) {
> + env->vfp.vxsat = 0x1;
> + return MAX_S8;
> + } else if (res < MIN_S8) {
> + env->vfp.vxsat = 0x1;
> + return MIN_S8;
> + } else {
> + return res;
> + }
> +}
> +
> +static uint8_t saturate_u8(CPURISCVState *env, uint16_t res)
> +{
> + if (res > MAX_U8) {
> + env->vfp.vxsat = 0x1;
> + return MAX_U8;
> + } else {
> + return res;
> + }
> +}
> +
> +static uint16_t saturate_u16(CPURISCVState *env, uint32_t res)
> +{
> + if (res > MAX_U16) {
> + env->vfp.vxsat = 0x1;
> + return MAX_U16;
> + } else {
> + return res;
> + }
> +}
> +
> +static uint32_t saturate_u32(CPURISCVState *env, uint64_t res)
> +{
> + if (res > MAX_U32) {
> + env->vfp.vxsat = 0x1;
> + return MAX_U32;
> + } else {
> + return res;
> + }
> +}
> +
> +static int16_t saturate_s16(CPURISCVState *env, int32_t res)
> +{
> + if (res > MAX_S16) {
> + env->vfp.vxsat = 0x1;
> + return MAX_S16;
> + } else if (res < MIN_S16) {
> + env->vfp.vxsat = 0x1;
> + return MIN_S16;
> + } else {
> + return res;
> + }
> +}
> +
> +static int32_t saturate_s32(CPURISCVState *env, int64_t res)
> +{
> + if (res > MAX_S32) {
> + env->vfp.vxsat = 0x1;
> + return MAX_S32;
> + } else if (res < MIN_S32) {
> + env->vfp.vxsat = 0x1;
> + return MIN_S32;
> + } else {
> + return res;
> + }
> +}
> +static uint16_t vwsmaccu_8(CPURISCVState *env, uint8_t a, uint8_t b,
> + uint16_t c)
> +{
> + uint16_t round, res;
> + uint16_t product = (uint16_t)a * (uint16_t)b;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)product, 4);
> + res = (round + product) >> 4;
> + return sat_add_u16(env, c, res);
> +}
> +
> +static uint32_t vwsmaccu_16(CPURISCVState *env, uint16_t a,
> uint16_t b,
> + uint32_t c)
> +{
> + uint32_t round, res;
> + uint32_t product = (uint32_t)a * (uint32_t)b;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)product, 8);
> + res = (round + product) >> 8;
> + return sat_add_u32(env, c, res);
> +}
> +
> +static uint64_t vwsmaccu_32(CPURISCVState *env, uint32_t a,
> uint32_t b,
> + uint64_t c)
> +{
> + uint64_t round, res;
> + uint64_t product = (uint64_t)a * (uint64_t)b;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)product, 16);
> + res = (round + product) >> 16;
> + return sat_add_u64(env, c, res);
> +}
> +
> +static int16_t vwsmacc_8(CPURISCVState *env, int8_t a, int8_t b,
> + int16_t c)
> +{
> + int16_t round, res;
> + int16_t product = (int16_t)a * (int16_t)b;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)product, 4);
> + res = (int16_t)(round + product) >> 4;
> + return sat_add_s16(env, c, res);
> +}
> +
> +static int32_t vwsmacc_16(CPURISCVState *env, int16_t a, int16_t b,
> + int32_t c)
> +{
> + int32_t round, res;
> + int32_t product = (int32_t)a * (int32_t)b;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)product, 8);
> + res = (int32_t)(round + product) >> 8;
> + return sat_add_s32(env, c, res);
> +}
> +
> +static int64_t vwsmacc_32(CPURISCVState *env, int32_t a, int32_t b,
> + int64_t c)
> +{
> + int64_t round, res;
> + int64_t product = (int64_t)a * (int64_t)b;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)product, 16);
> + res = (int64_t)(round + product) >> 16;
> + return sat_add_s64(env, c, res);
> +}
> +
> +static int16_t vwsmaccsu_8(CPURISCVState *env, uint8_t a, int8_t b,
> + int16_t c)
> +{
> + int16_t round, res;
> + int16_t product = (uint16_t)a * (int16_t)b;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)product, 4);
> + res = (round + product) >> 4;
> + return sat_sub_s16(env, c, res);
> +}
> +
> +static int32_t vwsmaccsu_16(CPURISCVState *env, uint16_t a,
> int16_t b,
> + uint32_t c)
> +{
> + int32_t round, res;
> + int32_t product = (uint32_t)a * (int32_t)b;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)product, 8);
> + res = (round + product) >> 8;
> + return sat_sub_s32(env, c, res);
> +}
> +
> +static int64_t vwsmaccsu_32(CPURISCVState *env, uint32_t a,
> int32_t b,
> + int64_t c)
> +{
> + int64_t round, res;
> + int64_t product = (uint64_t)a * (int64_t)b;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)product, 16);
> + res = (round + product) >> 16;
> + return sat_sub_s64(env, c, res);
> +}
> +
> +static int16_t vwsmaccus_8(CPURISCVState *env, int8_t a, uint8_t b,
> + int16_t c)
> +{
> + int16_t round, res;
> + int16_t product = (int16_t)a * (uint16_t)b;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)product, 4);
> + res = (round + product) >> 4;
> + return sat_sub_s16(env, c, res);
> +}
> +
> +static int32_t vwsmaccus_16(CPURISCVState *env, int16_t a,
> uint16_t b,
> + int32_t c)
> +{
> + int32_t round, res;
> + int32_t product = (int32_t)a * (uint32_t)b;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)product, 8);
> + res = (round + product) >> 8;
> + return sat_sub_s32(env, c, res);
> +}
> +
> +static uint64_t vwsmaccus_32(CPURISCVState *env, int32_t a,
> uint32_t b,
> + int64_t c)
> +{
> + int64_t round, res;
> + int64_t product = (int64_t)a * (uint64_t)b;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)product, 16);
> + res = (round + product) >> 16;
> + return sat_sub_s64(env, c, res);
> +}
> +
> +static int8_t vssra_8(CPURISCVState *env, int8_t a, uint8_t b)
> +{
> + int16_t round, res;
> + uint8_t shift = b & 0x7;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return res;
> +}
> +
> +static int16_t vssra_16(CPURISCVState *env, int16_t a, uint16_t b)
> +{
> + int32_t round, res;
> + uint8_t shift = b & 0xf;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static int32_t vssra_32(CPURISCVState *env, int32_t a, uint32_t b)
> +{
> + int64_t round, res;
> + uint8_t shift = b & 0x1f;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static int64_t vssra_64(CPURISCVState *env, int64_t a, uint64_t b)
> +{
> + int64_t round, res;
> + uint8_t shift = b & 0x3f;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a >> (shift - 1)) + (round >> (shift - 1));
> + return res >> 1;
> +}
> +
> +static int8_t vssrai_8(CPURISCVState *env, int8_t a, uint8_t b)
> +{
> + int16_t round, res;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static int16_t vssrai_16(CPURISCVState *env, int16_t a, uint8_t b)
> +{
> + int32_t round, res;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static int32_t vssrai_32(CPURISCVState *env, int32_t a, uint8_t b)
> +{
> + int64_t round, res;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static int64_t vssrai_64(CPURISCVState *env, int64_t a, uint8_t b)
> +{
> + int64_t round, res;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a >> (b - 1)) + (round >> (b - 1));
> + return res >> 1;
> +}
> +
> +static int8_t vnclip_16(CPURISCVState *env, int16_t a, uint8_t b)
> +{
> + int16_t round, res;
> + uint8_t shift = b & 0xf;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_s8(env, res);
> +}
> +
> +static int16_t vnclip_32(CPURISCVState *env, int32_t a, uint16_t b)
> +{
> + int32_t round, res;
> + uint8_t shift = b & 0x1f;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return saturate_s16(env, res);
> +}
> +
> +static int32_t vnclip_64(CPURISCVState *env, int64_t a, uint32_t b)
> +{
> + int64_t round, res;
> + uint8_t shift = b & 0x3f;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_s32(env, res);
> +}
> +
> +static int8_t vnclipi_16(CPURISCVState *env, int16_t a, uint8_t b)
> +{
> + int16_t round, res;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_s8(env, res);
> +}
> +
> +static int16_t vnclipi_32(CPURISCVState *env, int32_t a, uint8_t b)
> +{
> + int32_t round, res;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_s16(env, res);
> +}
> +
> +static int32_t vnclipi_64(CPURISCVState *env, int64_t a, uint8_t b)
> +{
> + int32_t round, res;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_s32(env, res);
> +}
> +
> +static uint8_t vnclipu_16(CPURISCVState *env, uint16_t a, uint8_t b)
> +{
> + uint16_t round, res;
> + uint8_t shift = b & 0xf;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_u8(env, res);
> +}
> +
> +static uint16_t vnclipu_32(CPURISCVState *env, uint32_t a,
> uint16_t b)
> +{
> + uint32_t round, res;
> + uint8_t shift = b & 0x1f;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_u16(env, res);
> +}
> +
> +static uint32_t vnclipu_64(CPURISCVState *env, uint64_t a,
> uint32_t b)
> +{
> + uint64_t round, res;
> + uint8_t shift = b & 0x3f;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_u32(env, res);
> +}
> +
> +static uint8_t vnclipui_16(CPURISCVState *env, uint16_t a, uint8_t b)
> +{
> + uint16_t round, res;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_u8(env, res);
> +}
> +
> +static uint16_t vnclipui_32(CPURISCVState *env, uint32_t a,
> uint8_t b)
> +{
> + uint32_t round, res;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_u16(env, res);
> +}
> +
> +static uint32_t vnclipui_64(CPURISCVState *env, uint64_t a,
> uint8_t b)
> +{
> + uint64_t round, res;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_u32(env, res);
> +}
> +
> +static uint8_t vssrl_8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint16_t round, res;
> + uint8_t shift = b & 0x7;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static uint16_t vssrl_16(CPURISCVState *env, uint16_t a, uint16_t b)
> +{
> + uint32_t round, res;
> + uint8_t shift = b & 0xf;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static uint32_t vssrl_32(CPURISCVState *env, uint32_t a, uint32_t b)
> +{
> + uint64_t round, res;
> + uint8_t shift = b & 0x1f;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static uint64_t vssrl_64(CPURISCVState *env, uint64_t a, uint64_t b)
> +{
> + uint64_t round, res;
> + uint8_t shift = b & 0x3f;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a >> (shift - 1)) + (round >> (shift - 1));
> + return res >> 1;
> +}
> +
> +static uint8_t vssrli_8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint16_t round, res;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static uint16_t vssrli_16(CPURISCVState *env, uint16_t a, uint8_t b)
> +{
> + uint32_t round, res;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static uint32_t vssrli_32(CPURISCVState *env, uint32_t a, uint8_t b)
> +{
> + uint64_t round, res;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static uint64_t vssrli_64(CPURISCVState *env, uint64_t a, uint8_t b)
> +{
> + uint64_t round, res;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a >> (b - 1)) + (round >> (b - 1));
> + return res >> 1;
> +}
> +
> +static int8_t vsmul_8(CPURISCVState *env, int8_t a, int8_t b)
> +{
> + int16_t round;
> + int8_t res;
> + int16_t product = (int16_t)a * (int16_t)b;
> +
> + if (a == MIN_S8 && b == MIN_S8) {
> + env->vfp.vxsat = 1;
> +
> + return MAX_S8;
> + }
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)product, 7);
> + res = sat_add_s16(env, product, round) >> 7;
> + return res;
> +}
> +
> +
> +static int16_t vsmul_16(CPURISCVState *env, int16_t a, int16_t b)
> +{
> + int32_t round;
> + int16_t res;
> + int32_t product = (int32_t)a * (int32_t)b;
> +
> + if (a == MIN_S16 && b == MIN_S16) {
> + env->vfp.vxsat = 1;
> +
> + return MAX_S16;
> + }
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)product, 15);
> + res = sat_add_s32(env, product, round) >> 15;
> + return res;
> +}
> +
> +static int32_t vsmul_32(CPURISCVState *env, int32_t a, int32_t b)
> +{
> + int64_t round;
> + int32_t res;
> + int64_t product = (int64_t)a * (int64_t)b;
> +
> + if (a == MIN_S32 && b == MIN_S32) {
> + env->vfp.vxsat = 1;
> +
> + return MAX_S32;
> + }
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)product, 31);
> + res = sat_add_s64(env, product, round) >> 31;
> + return res;
> +}
> +
> +
> +static int64_t vsmul_64(CPURISCVState *env, int64_t a, int64_t b)
> +{
> + int64_t res;
> + uint64_t abs_a = a, abs_b = b;
> + uint64_t lo_64, hi_64, carry, round;
> +
> + if (a == MIN_S64 && b == MIN_S64) {
> + env->vfp.vxsat = 1;
> +
> + return MAX_S64;
> + }
> +
> + if (a < 0) {
> + abs_a = ~a + 1;
> + }
> + if (b < 0) {
> + abs_b = ~b + 1;
> + }
> +
> + /* first get the whole product in {hi_64, lo_64} */
> + uint64_t a_hi = abs_a >> 32;
> + uint64_t a_lo = (uint32_t)abs_a;
> + uint64_t b_hi = abs_b >> 32;
> + uint64_t b_lo = (uint32_t)abs_b;
> +
> + /*
> + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> + * (a_lo * b_hi) << 32 + a_lo * b_lo
> + * = {hi_64, lo_64}
> + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo
> * b_lo)) >> 64
> + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo *
> b_lo) >> 32) >> 32
> + */
> +
> + lo_64 = abs_a * abs_b;
> + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + (uint64_t)(uint32_t)(a_lo * b_hi) +
> + ((a_lo * b_lo) >> 32)) >> 32;
> +
> + hi_64 = a_hi * b_hi +
> + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> + carry;
> +
> + if ((a ^ b) & SIGNBIT64) {
> + lo_64 = ~lo_64;
> + hi_64 = ~hi_64;
> + if (lo_64 == MAX_U64) {
> + lo_64 = 0;
> + hi_64 += 1;
> + } else {
> + lo_64 += 1;
> + }
> + }
> +
> + /* set rem and res */
> + round = fix_data_round(env, lo_64, 63);
> + if ((lo_64 + round) < lo_64) {
> + hi_64 += 1;
> + res = (hi_64 << 1);
> + } else {
> + res = (hi_64 << 1) | ((lo_64 + round) >> 63);
> + }
> +
> + return res;
> +}
> +static inline int8_t avg_round_s8(CPURISCVState *env, int8_t a,
> int8_t b)
> +{
> + int16_t round;
> + int8_t res;
> + int16_t sum = a + b;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)sum, 1);
> + res = (sum + round) >> 1;
> +
> + return res;
> +}
> +
> +static inline int16_t avg_round_s16(CPURISCVState *env, int16_t
> a, int16_t b)
> +{
> + int32_t round;
> + int16_t res;
> + int32_t sum = a + b;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)sum, 1);
> + res = (sum + round) >> 1;
> +
> + return res;
> +}
> +
> +static inline int32_t avg_round_s32(CPURISCVState *env, int32_t
> a, int32_t b)
> +{
> + int64_t round;
> + int32_t res;
> + int64_t sum = a + b;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)sum, 1);
> + res = (sum + round) >> 1;
> +
> + return res;
> +}
> +
> +static inline int64_t avg_round_s64(CPURISCVState *env, int64_t
> a, int64_t b)
> +{
> + int64_t rem = (a & 0x1) + (b & 0x1);
> + int64_t res = (a >> 1) + (b >> 1) + (rem >> 1);
> + int mod = env->vfp.vxrm;
> +
> + if (mod == 0x0) { /* rnu */
> + if (rem == 0x1) {
> + return res + 1;
> + }
> + } else if (mod == 0x1) { /* rne */
> + if ((rem & 0x1) == 1 && ((res & 0x1) == 1)) {
> + return res + 1;
> + }
> + } else if (mod == 0x3) { /* rod */
> + if (((rem & 0x1) >= 0x1) && (res & 0x1) == 0) {
> + return res + 1;
> + }
> + }
> + return res;
> +}
> +
> +static target_ulong helper_fclass_h(uint64_t frs1)
> +{
> + float16 f = frs1;
> + bool sign = float16_is_neg(f);
> +
> + if (float16_is_infinity(f)) {
> + return sign ? 1 << 0 : 1 << 7;
> + } else if (float16_is_zero(f)) {
> + return sign ? 1 << 3 : 1 << 4;
> + } else if (float16_is_zero_or_denormal(f)) {
> + return sign ? 1 << 2 : 1 << 5;
> + } else if (float16_is_any_nan(f)) {
> + float_status s = { }; /* for snan_bit_is_one */
> + return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
> + } else {
> + return sign ? 1 << 1 : 1 << 6;
> + }
> +}
> +
> +static inline bool vector_vtype_ill(CPURISCVState *env)
> +{
> + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline void vector_vtype_set_ill(CPURISCVState *env)
> +{
> + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1);
> + return;
> +}
> +
> +static inline int vector_vtype_get_sew(CPURISCVState *env)
> +{
> + return (env->vfp.vtype >> 2) & 0x7;
> +}
> +
> +static inline int vector_get_width(CPURISCVState *env)
> +{
> + return 8 * (1 << vector_vtype_get_sew(env));
> +}
> +
> +static inline int vector_get_lmul(CPURISCVState *env)
> +{
> + return 1 << (env->vfp.vtype & 0x3);
> +}
> +
> +static inline int vector_get_vlmax(CPURISCVState *env)
> +{
> + return vector_get_lmul(env) * VLEN / vector_get_width(env);
> +}
> +
> +static inline int vector_elem_mask(CPURISCVState *env, uint32_t
> vm, int width,
> + int lmul, int index)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / 8;
> + int pos = (index * mlen) % 8;
> +
> + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1);
> +}
> +
> +static inline bool vector_overlap_vm_common(int lmul, int vm, int rd)
> +{
> + if (lmul > 1 && vm == 0 && rd == 0) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline bool vector_overlap_vm_force(int vm, int rd)
> +{
> + if (vm == 0 && rd == 0) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline bool vector_overlap_carry(int lmul, int rd)
> +{
> + if (lmul > 1 && rd == 0) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline bool vector_overlap_dstgp_srcgp(int rd, int dlen,
> int rs,
> + int slen)
> +{
> + if ((rd >= rs && rd < rs + slen) || (rs >= rd && rs < rd +
> dlen)) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline uint64_t vector_get_mask(int start, int end)
> +{
> + return ((uint64_t)(~((uint64_t)0))) << (63 - end + start) >>
> (63 - end);
> +}
> +
> +/* fetch unsigned element by width */
> +static inline uint64_t vector_get_iu_elem(CPURISCVState *env,
> uint32_t width,
> + uint32_t rs2, uint32_t index)
> +{
> + uint64_t elem;
> + if (width == 8) {
> + elem = env->vfp.vreg[rs2].u8[index];
> + } else if (width == 16) {
> + elem = env->vfp.vreg[rs2].u16[index];
> + } else if (width == 32) {
> + elem = env->vfp.vreg[rs2].u32[index];
> + } else if (width == 64) {
> + elem = env->vfp.vreg[rs2].u64[index];
> + } else { /* the max of (XLEN, FLEN) is no bigger than 64 */
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return 0;
> + }
> + return elem;
> +}
> +
> +static inline int vector_mask_reg(CPURISCVState *env, uint32_t
> reg, int width,
> + int lmul, int index)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / 8;
> + int pos = (index * mlen) % 8;
> + return (env->vfp.vreg[reg].u8[idx] >> pos) & 0x1;
> +}
> +
> +static inline void vector_mask_result(CPURISCVState *env,
> uint32_t reg,
> + int width, int lmul, int index, uint32_t result)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / width;
> + int pos = (index * mlen) % width;
> + uint64_t mask = ~((((uint64_t)1 << mlen) - 1) << pos);
> +
> + switch (width) {
> + case 8:
> + env->vfp.vreg[reg].u8[idx] = (env->vfp.vreg[reg].u8[idx]
> & mask)
> + | (result << pos);
> + break;
> + case 16:
> + env->vfp.vreg[reg].u16[idx] =
> (env->vfp.vreg[reg].u16[idx] & mask)
> + | (result << pos);
> + break;
> + case 32:
> + env->vfp.vreg[reg].u32[idx] =
> (env->vfp.vreg[reg].u32[idx] & mask)
> + | (result << pos);
> + break;
> + case 64:
> + env->vfp.vreg[reg].u64[idx] =
> (env->vfp.vreg[reg].u64[idx] & mask)
> + |
> ((uint64_t)result << pos);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> +
> + return;
> +}
> +
> +/**
> + * deposit16:
> + * @value: initial value to insert bit field into
> + * @start: the lowest bit in the bit field (numbered from 0)
> + * @length: the length of the bit field
> + * @fieldval: the value to insert into the bit field
> + *
> + * Deposit @fieldval into the 16 bit @value at the bit field
> specified
> + * by the @start and @length parameters, and return the modified
> + * @value. Bits of @value outside the bit field are not modified.
> + * Bits of @fieldval above the least significant @length bits are
> + * ignored. The bit field must lie entirely within the 16 bit word.
> + * It is valid to request that all 16 bits are modified (ie @length
> + * 16 and @start 0).
> + *
> + * Returns: the modified @value.
> + */
> +static inline uint16_t deposit16(uint16_t value, int start, int
> length,
> + uint16_t fieldval)
> +{
> + uint16_t mask;
> + assert(start >= 0 && length > 0 && length <= 16 - start);
> + mask = (~0U >> (16 - length)) << start;
> + return (value & ~mask) | ((fieldval << start) & mask);
> +}
> +
> +static void vector_tail_amo(CPURISCVState *env, int vreg, int
> index, int width)
> +{
> + switch (width) {
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 64:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_common(CPURISCVState *env, int vreg, int
> index,
> + int width)
> +{
> + switch (width) {
> + case 8:
> + env->vfp.vreg[vreg].u8[index] = 0;
> + break;
> + case 16:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 64:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_segment(CPURISCVState *env, int vreg, int
> index,
> + int width, int nf, int lmul)
> +{
> + switch (width) {
> + case 8:
> + while (nf >= 0) {
> + env->vfp.vreg[vreg + nf * lmul].u8[index] = 0;
> + nf--;
> + }
> + break;
> + case 16:
> + while (nf >= 0) {
> + env->vfp.vreg[vreg + nf * lmul].u16[index] = 0;
> + nf--;
> + }
> + break;
> + case 32:
> + while (nf >= 0) {
> + env->vfp.vreg[vreg + nf * lmul].u32[index] = 0;
> + nf--;
> + }
> + break;
> + case 64:
> + while (nf >= 0) {
> + env->vfp.vreg[vreg + nf * lmul].u64[index] = 0;
> + nf--;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_widen(CPURISCVState *env, int vreg, int
> index,
> + int width)
> +{
> + switch (width) {
> + case 8:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 16:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_narrow(CPURISCVState *env, int vreg, int
> index,
> + int width)
> +{
> + switch (width) {
> + case 8:
> + env->vfp.vreg[vreg].u8[index] = 0;
> + break;
> + case 16:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_fcommon(CPURISCVState *env, int vreg, int
> index,
> + int width)
> +{
> + switch (width) {
> + case 16:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 64:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_fwiden(CPURISCVState *env, int vreg, int
> index,
> + int width)
> +{
> + switch (width) {
> + case 16:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_fnarrow(CPURISCVState *env, int vreg, int
> index,
> + int width)
> +{
> + switch (width) {
> + case 16:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +static inline int vector_get_carry(CPURISCVState *env, int width,
> int lmul,
> + int index)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / 8;
> + int pos = (index * mlen) % 8;
> +
> + return (env->vfp.vreg[0].u8[idx] >> pos) & 0x1;
> +}
> +
> +static inline void vector_get_layout(CPURISCVState *env, int
> width, int lmul,
> + int index, int *idx, int *pos)
> +{
> + int mlen = width / lmul;
> + *idx = (index * mlen) / 8;
> + *pos = (index * mlen) % 8;
> +}
> +
> +static bool vector_lmul_check_reg(CPURISCVState *env, uint32_t lmul,
> + uint32_t reg, bool widen)
> +{
> + int legal = widen ? (lmul * 2) : lmul;
> +
> + if ((lmul != 1 && lmul != 2 && lmul != 4 && lmul != 8) ||
> + (lmul == 8 && widen)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return false;
> + }
> +
> + if (reg % legal != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return false;
> + }
> + return true;
> +}
> +
> +static inline uint64_t u64xu64_lh(uint64_t a, uint64_t b)
> +{
> + uint64_t hi_64, carry;
> +
> +
> + /* first get the whole product in {hi_64, lo_64} */
> + uint64_t a_hi = a >> 32;
> + uint64_t a_lo = (uint32_t)a;
> + uint64_t b_hi = b >> 32;
> + uint64_t b_lo = (uint32_t)b;
> +
> + /*
> + * a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> + * (a_lo * b_hi) << 32 + a_lo * b_lo
> + * = {hi_64, lo_64}
> + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo
> * b_lo)) >> 64
> + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo *
> b_lo) >> 32) >> 32
> + */
> +
> + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + (uint64_t)(uint32_t)(a_lo * b_hi) +
> + ((a_lo * b_lo) >> 32)) >> 32;
> +
> + hi_64 = a_hi * b_hi +
> + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> + carry;
> +
> + return hi_64;
> +}
> +
> +
> +static inline int64_t s64xu64_lh(int64_t a, uint64_t b)
> +{
> + uint64_t abs_a = a;
> + uint64_t lo_64, hi_64, carry;
> +
> + if (a < 0) {
> + abs_a = ~a + 1;
> + }
> +
> + /* first get the whole product in {hi_64, lo_64} */
> + uint64_t a_hi = abs_a >> 32;
> + uint64_t a_lo = (uint32_t)abs_a;
> + uint64_t b_hi = b >> 32;
> + uint64_t b_lo = (uint32_t)b;
> +
> + /*
> + * abs_a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> + * (a_lo * b_hi) << 32 + a_lo * b_lo
> + * = {hi_64, lo_64}
> + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo
> * b_lo)) >> 64
> + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo *
> b_lo) >> 32) >> 32
> + */
> +
> + lo_64 = abs_a * b;
> + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + (uint64_t)(uint32_t)(a_lo * b_hi) +
> + ((a_lo * b_lo) >> 32)) >> 32;
> +
> + hi_64 = a_hi * b_hi +
> + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> + carry;
> + if ((a ^ b) & SIGNBIT64) {
> + lo_64 = ~lo_64;
> + hi_64 = ~hi_64;
> + if (lo_64 == MAX_U64) {
> + lo_64 = 0;
> + hi_64 += 1;
> + } else {
> + lo_64 += 1;
> + }
> + }
> + return hi_64;
> +}
> +
> +
> +static inline int64_t s64xs64_lh(int64_t a, int64_t b)
> +{
> + uint64_t abs_a = a, abs_b = b;
> + uint64_t lo_64, hi_64, carry;
> +
> + if (a < 0) {
> + abs_a = ~a + 1;
> + }
> + if (b < 0) {
> + abs_b = ~b + 1;
> + }
> +
> + /* first get the whole product in {hi_64, lo_64} */
> + uint64_t a_hi = abs_a >> 32;
> + uint64_t a_lo = (uint32_t)abs_a;
> + uint64_t b_hi = abs_b >> 32;
> + uint64_t b_lo = (uint32_t)abs_b;
> +
> + /*
> + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> + * (a_lo * b_hi) << 32 + a_lo * b_lo
> + * = {hi_64, lo_64}
> + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo
> * b_lo)) >> 64
> + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo *
> b_lo) >> 32) >> 32
> + */
> +
> + lo_64 = abs_a * abs_b;
> + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + (uint64_t)(uint32_t)(a_lo * b_hi) +
> + ((a_lo * b_lo) >> 32)) >> 32;
> +
> + hi_64 = a_hi * b_hi +
> + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> + carry;
> +
> + if ((a ^ b) & SIGNBIT64) {
> + lo_64 = ~lo_64;
> + hi_64 = ~hi_64;
> + if (lo_64 == MAX_U64) {
> + lo_64 = 0;
> + hi_64 += 1;
> + } else {
> + lo_64 += 1;
> + }
> + }
> + return hi_64;
> +}
> +
> +void VECTOR_HELPER(vsetvl)(CPURISCVState *env, uint32_t rs1,
> uint32_t rs2,
> + uint32_t rd)
> +{
> + int sew, max_sew, vlmax, vl;
> +
> + if (rs2 == 0) {
> + vector_vtype_set_ill(env);
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + env->vfp.vtype = env->gpr[rs2];
> + sew = 1 << vector_get_width(env) / 8;
> + max_sew = sizeof(target_ulong);
> +
> +
> + if (env->misa & RVD) {
> + max_sew = max_sew > 8 ? max_sew : 8;
> + } else if (env->misa & RVF) {
> + max_sew = max_sew > 4 ? max_sew : 4;
> + }
> + if (sew > max_sew) {
> + vector_vtype_set_ill(env);
> + return;
> + }
> +
> + vlmax = vector_get_vlmax(env);
> + if (rs1 == 0) {
> + vl = vlmax;
> + } else if (env->gpr[rs1] <= vlmax) {
> + vl = env->gpr[rs1];
> + } else if (env->gpr[rs1] < 2 * vlmax) {
> + vl = ceil(env->gpr[rs1] / 2);
> + } else {
> + vl = vlmax;
> + }
> + env->vfp.vl = vl;
> + env->gpr[rd] = vl;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsetvli)(CPURISCVState *env, uint32_t rs1,
> uint32_t zimm,
> + uint32_t rd)
> +{
> + int sew, max_sew, vlmax, vl;
> +
> + env->vfp.vtype = zimm;
> + sew = vector_get_width(env) / 8;
> + max_sew = sizeof(target_ulong);
> +
> + if (env->misa & RVD) {
> + max_sew = max_sew > 8 ? max_sew : 8;
> + } else if (env->misa & RVF) {
> + max_sew = max_sew > 4 ? max_sew : 4;
> + }
> + if (sew > max_sew) {
> + vector_vtype_set_ill(env);
> + return;
> + }
> +
> + vlmax = vector_get_vlmax(env);
> + if (rs1 == 0) {
> + vl = vlmax;
> + } else if (env->gpr[rs1] <= vlmax) {
> + vl = env->gpr[rs1];
> + } else if (env->gpr[rs1] < 2 * vlmax) {
> + vl = ceil(env->gpr[rs1] / 2);
> + } else {
> + vl = vlmax;
> + }
> + env->vfp.vl = vl;
> + env->gpr[rd] = vl;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vrgather.vv vd, vs2, vs1, vm #
> + * vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]];
> + */
> +void VECTOR_HELPER(vrgather_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src, src1;
> + uint32_t index;
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + index = env->vfp.vreg[src1].u8[j];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u8[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[index];
> + }
> + }
> + break;
> + case 16:
> + index = env->vfp.vreg[src1].u16[j];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u16[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[index];
> + }
> + }
> + break;
> + case 32:
> + index = env->vfp.vreg[src1].u32[j];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u32[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[index];
> + }
> + }
> + break;
> + case 64:
> + index = env->vfp.vreg[src1].u64[j];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u64[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[index];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 :
> vs2[rs1] */
> +void VECTOR_HELPER(vrgather_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src;
> + uint32_t index;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + index = env->gpr[rs1];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u8[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[index];
> + }
> + }
> + break;
> + case 16:
> + index = env->gpr[rs1];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u16[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[index];
> + }
> + }
> + break;
> + case 32:
> + index = env->gpr[rs1];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u32[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[index];
> + }
> + }
> + break;
> + case 64:
> + index = env->gpr[rs1];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u64[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[index];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vrgather.vi <http://vrgather.vi> vd, vs2, imm, vm # vd[i] =
> (imm >= VLMAX) ? 0 : vs2[imm] */
> +void VECTOR_HELPER(vrgather_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src;
> + uint32_t index;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + index = rs1;
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u8[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[index];
> + }
> + }
> + break;
> + case 16:
> + index = rs1;
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u16[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[index];
> + }
> + }
> + break;
> + case 32:
> + index = rs1;
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u32[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[index];
> + }
> + }
> + break;
> + case 64:
> + index = rs1;
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u64[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[index];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vext_x_v)(CPURISCVState *env, uint32_t rs1,
> uint32_t rs2,
> + uint32_t rd)
> +{
> + int width;
> + uint64_t elem;
> + target_ulong index = env->gpr[rs1];
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> +
> + elem = vector_get_iu_elem(env, width, rs2, index);
> + if (index >= VLEN / width) { /* index is too big */
> + env->gpr[rd] = 0;
> + } else {
> + env->gpr[rd] = elem;
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmv.f.s rd, vs2 # rd = vs2[0] (rs1=0) */
> +void VECTOR_HELPER(vfmv_f_s)(CPURISCVState *env, uint32_t rs1,
> uint32_t rs2,
> + uint32_t rd)
> +{
> + int width, flen;
> + uint64_t mask;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->misa & RVD) {
> + flen = 8;
> + } else if (env->misa & RVF) {
> + flen = 4;
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + mask = (~((uint64_t)0)) << width;
> +
> + if (width == 8) {
> + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s8[0] | mask;
> + } else if (width == 16) {
> + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s16[0] |
> mask;
> + } else if (width == 32) {
> + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s32[0] |
> mask;
> + } else if (width == 64) {
> + if (flen == 4) {
> + env->fpr[rd] = env->vfp.vreg[rs2].s64[0] & 0xffffffff;
> + } else {
> + env->fpr[rd] = env->vfp.vreg[rs2].s64[0];
> + }
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmv.s.x vd, rs1 # vd[0] = rs1 */
> +void VECTOR_HELPER(vmv_s_x)(CPURISCVState *env, uint32_t rs1,
> uint32_t rs2,
> + uint32_t rd)
> +{
> + int width;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= env->vfp.vl) {
> + return;
> + }
> +
> + memset(&env->vfp.vreg[rd].u8[0], 0, VLEN / 8);
> + width = vector_get_width(env);
> +
> + if (width == 8) {
> + env->vfp.vreg[rd].u8[0] = env->gpr[rs1];
> + } else if (width == 16) {
> + env->vfp.vreg[rd].u16[0] = env->gpr[rs1];
> + } else if (width == 32) {
> + env->vfp.vreg[rd].u32[0] = env->gpr[rs1];
> + } else if (width == 64) {
> + env->vfp.vreg[rd].u64[0] = env->gpr[rs1];
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2 = 0) */
> +void VECTOR_HELPER(vfmv_s_f)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, flen;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= env->vfp.vl) {
> + return;
> + }
> + if (env->misa & RVD) {
> + flen = 8;
> + } else if (env->misa & RVF) {
> + flen = 4;
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> +
> + if (width == 8) {
> + env->vfp.vreg[rd].u8[0] = env->fpr[rs1];
> + } else if (width == 16) {
> + env->vfp.vreg[rd].u16[0] = env->fpr[rs1];
> + } else if (width == 32) {
> + env->vfp.vreg[rd].u32[0] = env->fpr[rs1];
> + } else if (width == 64) {
> + if (flen == 4) { /* 1-extended to FLEN bits */
> + env->vfp.vreg[rd].u64[0] = (uint64_t)env->fpr[rs1]
> + | 0xffffffff00000000;
> + } else {
> + env->vfp.vreg[rd].u64[0] = env->fpr[rs1];
> + }
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
> +void VECTOR_HELPER(vslideup_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax, offset;
> + int i, j, dest, src, k;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + offset = env->gpr[rs1];
> +
> + if (offset < env->vfp.vstart) {
> + offset = env->vfp.vstart;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i - offset) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i - offset) % (VLEN / width);
> + if (i < offset) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslideup.vi <http://vslideup.vi> vd, vs2, rs1, vm # vd[i+rs1]
> = vs2[i] */
> +void VECTOR_HELPER(vslideup_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax, offset;
> + int i, j, dest, src, k;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + offset = rs1;
> +
> + if (offset < env->vfp.vstart) {
> + offset = env->vfp.vstart;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i - offset) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i - offset) % (VLEN / width);
> + if (i < offset) {
> + continue;
> + } else if (i < vl) {
> + if (width == 8) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[k];
> + }
> + } else if (width == 16) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + }
> + } else if (width == 32) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + }
> + } else if (width == 64) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + }
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
> +void VECTOR_HELPER(vslide1up_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src, k;
> + uint64_t s1;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + s1 = env->gpr[rs1];
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i - 1) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i - 1) % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i == 0 && env->vfp.vstart == 0) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = s1;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = s1;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = s1;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = s1;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src].u8[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i + rs1] */
> +void VECTOR_HELPER(vslidedown_vx)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax, offset;
> + int i, j, dest, src, k;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + offset = env->gpr[rs1];
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i + offset) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i + offset) % (VLEN / width);
> + if (i < offset) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[k];
> + } else {
> + env->vfp.vreg[dest].u8[j] = 0;
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + } else {
> + env->vfp.vreg[dest].u16[j] = 0;
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + } else {
> + env->vfp.vreg[dest].u32[j] = 0;
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + } else {
> + env->vfp.vreg[dest].u64[j] = 0;
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vslidedown_vi)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax, offset;
> + int i, j, dest, src, k;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + offset = rs1;
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i + offset) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i + offset) % (VLEN / width);
> + if (i < offset) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[k];
> + } else {
> + env->vfp.vreg[dest].u8[j] = 0;
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + } else {
> + env->vfp.vreg[dest].u16[j] = 0;
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + } else {
> + env->vfp.vreg[dest].u32[j] = 0;
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + } else {
> + env->vfp.vreg[dest].u64[j] = 0;
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslide1down.vx vd, vs2, rs1, vm # vd[vl - 1]=x[rs1], vd[i] =
> vs2[i + 1] */
> +void VECTOR_HELPER(vslide1down_vx)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src, k;
> + uint64_t s1;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + s1 = env->gpr[rs1];
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i + 1) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i + 1) % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i == vl - 1 && i >= env->vfp.vstart) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = s1;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = s1;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = s1;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = s1;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else if (i < vl - 1) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src].u8[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vcompress.vm vd, vs2, vs1
> + * Compress into vd elements of vs2 where vs1 is enabled
> + */
> +void VECTOR_HELPER(vcompress_vm)(CPURISCVState *env, uint32_t
> rs1, uint32_t rs2,
> + uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src;
> + uint32_t vd_idx, num = 0;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, 1)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + /* zeroed all elements */
> + for (i = 0; i < lmul; i++) {
> + memset(&env->vfp.vreg[rd + i].u64[0], 0, VLEN / 8);
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (num / (VLEN / width));
> + src = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + vd_idx = num % (VLEN / width);
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_mask_reg(env, rs1, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[vd_idx] =
> + env->vfp.vreg[src].u8[j];
> + num++;
> + }
> + break;
> + case 16:
> + if (vector_mask_reg(env, rs1, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[vd_idx] =
> + env->vfp.vreg[src].u16[j];
> + num++;
> + }
> + break;
> + case 32:
> + if (vector_mask_reg(env, rs1, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[vd_idx] =
> + env->vfp.vreg[src].u32[j];
> + num++;
> + }
> + break;
> + case 64:
> + if (vector_mask_reg(env, rs1, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[vd_idx] =
> + env->vfp.vreg[src].u64[j];
> + num++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadd_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src1].u8[j]
> + + env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + + env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + + env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + + env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadd_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + + env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadd_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredsum.vs vd, vs2, vs1, vm # vd[0] = sum(vs1[0] , vs2[*]) */
> +void VECTOR_HELPER(vredsum_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> +
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t sum = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u8[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u8[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = sum;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u16[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u16[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = sum;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u32[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u32[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = sum;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u64[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u64[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = sum;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfadd.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfadd_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_add(
> + env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_add(
> + env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_add(
> + env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfadd.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfadd_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_add(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_add(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_add(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredand.vs vd, vs2, vs1, vm # vd[0] = and( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredand_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t res = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res &= env->vfp.vreg[src2].u8[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = res;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res &= env->vfp.vreg[src2].u16[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = res;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res &= env->vfp.vreg[src2].u32[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = res;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res &= env->vfp.vreg[src2].u64[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = res;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfredsum.vs vd, vs2, vs1, vm # Unordered sum */
> +void VECTOR_HELPER(vfredsum_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + float16 sum16 = 0.0f;
> + float32 sum32 = 0.0f;
> + float64 sum64 = 0.0f;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 16:
> + if (i == 0) {
> + sum16 = env->vfp.vreg[rs1].f16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum16 = float16_add(sum16,
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f16[0] = sum16;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + sum32 = env->vfp.vreg[rs1].f32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum32 = float32_add(sum32,
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f32[0] = sum32;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + sum64 = env->vfp.vreg[rs1].f64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum64 = float64_add(sum64,
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f64[0] = sum64;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsub_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j]
> + - env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + - env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + - env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + - env->vfp.vreg[src1].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsub_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j]
> + - env->gpr[rs1];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + - env->gpr[rs1];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + - env->gpr[rs1];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + - (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredor.vs vd, vs2, vs1, vm # vd[0] = or( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredor_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t res = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res |= env->vfp.vreg[src2].u8[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = res;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res |= env->vfp.vreg[src2].u16[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = res;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res |= env->vfp.vreg[src2].u32[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = res;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res |= env->vfp.vreg[src2].u64[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = res;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsub.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfsub_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_sub(
> + env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[src1].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_sub(
> + env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[src1].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_sub(
> + env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[src1].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsub.vf vd, vs2, rs1, vm # Vector-scalar vd[i] = vs2[i] -
> f[rs1] */
> +void VECTOR_HELPER(vfsub_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_sub(
> + env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_sub(
> + env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_sub(
> + env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vrsub_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + - env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + - env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + - env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + - env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vrsub_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + - env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + - env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + - env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + - env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredxor.vs vd, vs2, vs1, vm # vd[0] = xor( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredxor_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t res = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res ^= env->vfp.vreg[src2].u8[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = res;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res ^= env->vfp.vreg[src2].u16[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = res;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res ^= env->vfp.vreg[src2].u32[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = res;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res ^= env->vfp.vreg[src2].u64[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = res;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfredosum.vs vd, vs2, vs1, vm # Ordered sum */
> +void VECTOR_HELPER(vfredosum_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + helper_vector_vfredsum_vs(env, vm, rs1, rs2, rd);
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vminu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] <=
> + env->vfp.vreg[src2].u8[j]) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src1].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] <=
> + env->vfp.vreg[src2].u16[j]) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src1].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] <=
> + env->vfp.vreg[src2].u32[j]) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src1].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] <=
> + env->vfp.vreg[src2].u64[j]) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src1].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vminu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].u8[j]) {
> + env->vfp.vreg[dest].u8[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].u16[j]) {
> + env->vfp.vreg[dest].u16[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].u32[j]) {
> + env->vfp.vreg[dest].u32[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) <=
> + env->vfp.vreg[src2].u64[j]) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1]);
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredminu.vs vd, vs2, vs1, vm # vd[0] = minu( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredminu_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t minu = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + minu = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (minu > env->vfp.vreg[src2].u8[j]) {
> + minu = env->vfp.vreg[src2].u8[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = minu;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + minu = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (minu > env->vfp.vreg[src2].u16[j]) {
> + minu = env->vfp.vreg[src2].u16[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = minu;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + minu = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (minu > env->vfp.vreg[src2].u32[j]) {
> + minu = env->vfp.vreg[src2].u32[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = minu;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + minu = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (minu > env->vfp.vreg[src2].u64[j]) {
> + minu = env->vfp.vreg[src2].u64[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = minu;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmin.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfmin_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_minnum(
> + env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_minnum(
> + env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_minnum(
> + env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmin.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfmin_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_minnum(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_minnum(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_minnum(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmin_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s8[j] <=
> + env->vfp.vreg[src2].s8[j]) {
> + env->vfp.vreg[dest].s8[j] =
> + �� env->vfp.vreg[src1].s8[j];
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src2].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s16[j] <=
> + env->vfp.vreg[src2].s16[j]) {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src1].s16[j];
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src2].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s32[j] <=
> + env->vfp.vreg[src2].s32[j]) {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src1].s32[j];
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src2].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s64[j] <=
> + env->vfp.vreg[src2].s64[j]) {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src1].s64[j];
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src2].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmin_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int8_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].s8[j]) {
> + env->vfp.vreg[dest].s8[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src2].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int16_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].s16[j]) {
> + env->vfp.vreg[dest].s16[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src2].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int32_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].s32[j]) {
> + env->vfp.vreg[dest].s32[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src2].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int64_t)extend_gpr(env->gpr[rs1]) <=
> + env->vfp.vreg[src2].s64[j]) {
> + env->vfp.vreg[dest].s64[j] =
> + (int64_t)extend_gpr(env->gpr[rs1]);
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src2].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredmin.vs vd, vs2, vs1, vm # vd[0] = min( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredmin_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + int64_t min = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + min = env->vfp.vreg[rs1].s8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (min > env->vfp.vreg[src2].s8[j]) {
> + min = env->vfp.vreg[src2].s8[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s8[0] = min;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + min = env->vfp.vreg[rs1].s16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (min > env->vfp.vreg[src2].s16[j]) {
> + min = env->vfp.vreg[src2].s16[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s16[0] = min;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + min = env->vfp.vreg[rs1].s32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (min > env->vfp.vreg[src2].s32[j]) {
> + min = env->vfp.vreg[src2].s32[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s32[0] = min;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + min = env->vfp.vreg[rs1].s64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (min > env->vfp.vreg[src2].s64[j]) {
> + min = env->vfp.vreg[src2].s64[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s64[0] = min;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfredmin.vs vd, vs2, vs1, vm # Minimum value */
> +void VECTOR_HELPER(vfredmin_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + float16 min16 = 0.0f;
> + float32 min32 = 0.0f;
> + float64 min64 = 0.0f;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 16:
> + if (i == 0) {
> + min16 = env->vfp.vreg[rs1].f16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + min16 = float16_minnum(min16,
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f16[0] = min16;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + min32 = env->vfp.vreg[rs1].f32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + min32 = float32_minnum(min32,
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f32[0] = min32;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + min64 = env->vfp.vreg[rs1].f64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + min64 = float64_minnum(min64,
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f64[0] = min64;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmaxu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] >=
> + env->vfp.vreg[src2].u8[j]) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src1].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] >=
> + env->vfp.vreg[src2].u16[j]) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src1].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] >=
> + env->vfp.vreg[src2].u32[j]) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src1].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] >=
> + env->vfp.vreg[src2].u64[j]) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src1].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmaxu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].u8[j]) {
> + env->vfp.vreg[dest].u8[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].u16[j]) {
> + env->vfp.vreg[dest].u16[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].u32[j]) {
> + env->vfp.vreg[dest].u32[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) >=
> + env->vfp.vreg[src2].u64[j]) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1]);
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredmaxu.vs vd, vs2, vs1, vm # vd[0] = maxu( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredmaxu_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t maxu = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + maxu = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (maxu < env->vfp.vreg[src2].u8[j]) {
> + maxu = env->vfp.vreg[src2].u8[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = maxu;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + maxu = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (maxu < env->vfp.vreg[src2].u16[j]) {
> + maxu = env->vfp.vreg[src2].u16[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = maxu;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + maxu = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (maxu < env->vfp.vreg[src2].u32[j]) {
> + maxu = env->vfp.vreg[src2].u32[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = maxu;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + maxu = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (maxu < env->vfp.vreg[src2].u64[j]) {
> + maxu = env->vfp.vreg[src2].u64[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = maxu;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*vfmax.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfmax_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_maxnum(
> + env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_maxnum(
> + env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_maxnum(
> + env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmax.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfmax_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_maxnum(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_maxnum(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_maxnum(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmax_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s8[j] >=
> + env->vfp.vreg[src2].s8[j]) {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src1].s8[j];
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src2].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s16[j] >=
> + env->vfp.vreg[src2].s16[j]) {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src1].s16[j];
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src2].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s32[j] >=
> + env->vfp.vreg[src2].s32[j]) {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src1].s32[j];
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src2].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s64[j] >=
> + env->vfp.vreg[src2].s64[j]) {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src1].s64[j];
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src2].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmax_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int8_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].s8[j]) {
> + env->vfp.vreg[dest].s8[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src2].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int16_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].s16[j]) {
> + env->vfp.vreg[dest].s16[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src2].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int32_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].s32[j]) {
> + env->vfp.vreg[dest].s32[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src2].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int64_t)extend_gpr(env->gpr[rs1]) >=
> + env->vfp.vreg[src2].s64[j]) {
> + env->vfp.vreg[dest].s64[j] =
> + (int64_t)extend_gpr(env->gpr[rs1]);
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src2].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredmax.vs vd, vs2, vs1, vm # vd[0] = max( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredmax_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + int64_t max = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + max = env->vfp.vreg[rs1].s8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (max < env->vfp.vreg[src2].s8[j]) {
> + max = env->vfp.vreg[src2].s8[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s8[0] = max;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + max = env->vfp.vreg[rs1].s16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (max < env->vfp.vreg[src2].s16[j]) {
> + max = env->vfp.vreg[src2].s16[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s16[0] = max;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + max = env->vfp.vreg[rs1].s32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (max < env->vfp.vreg[src2].s32[j]) {
> + max = env->vfp.vreg[src2].s32[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s32[0] = max;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + max = env->vfp.vreg[rs1].s64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (max < env->vfp.vreg[src2].s64[j]) {
> + max = env->vfp.vreg[src2].s64[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s64[0] = max;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfredmax.vs vd, vs2, vs1, vm # Maximum value */
> +void VECTOR_HELPER(vfredmax_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + float16 max16 = 0.0f;
> + float32 max32 = 0.0f;
> + float64 max64 = 0.0f;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 16:
> + if (i == 0) {
> + max16 = env->vfp.vreg[rs1].f16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + max16 = float16_maxnum(max16,
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f16[0] = max16;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + max32 = env->vfp.vreg[rs1].f32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + max32 = float32_maxnum(max32,
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f32[0] = max32;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + max64 = env->vfp.vreg[rs1].f64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + max64 = float64_maxnum(max64,
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f64[0] = max64;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnj.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfsgnj_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> + env->vfp.vreg[src1].f16[j],
> + 0,
> + 15,
> + env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> + env->vfp.vreg[src1].f32[j],
> + 0,
> + 31,
> + env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> + env->vfp.vreg[src1].f64[j],
> + 0,
> + 63,
> + env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnj.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfsgnj_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> + env->fpr[rs1],
> + 0,
> + 15,
> + env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> + env->fpr[rs1],
> + 0,
> + 31,
> + env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> + env->fpr[rs1],
> + 0,
> + 63,
> + env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vand_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src1].u8[j]
> + & env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + & env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + & env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + & env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vand_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + & env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + & env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + & env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + & env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vand_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + & env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + & env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + & env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + & env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnjn.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfsgnjn_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> + ~env->vfp.vreg[src1].f16[j],
> + 0,
> + 15,
> + env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> + ~env->vfp.vreg[src1].f32[j],
> + 0,
> + 31,
> + env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> + ~env->vfp.vreg[src1].f64[j],
> + 0,
> + 63,
> + env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +/* vfsgnjn.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfsgnjn_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> + ~env->fpr[rs1],
> + 0,
> + 15,
> + env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> + ~env->fpr[rs1],
> + 0,
> + 31,
> + env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> + ~env->fpr[rs1],
> + 0,
> + 63,
> + env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vor_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src1].u8[j]
> + | env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + | env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + | env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + | env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vor_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + | env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + | env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + | env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + | env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vor_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + | env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + | env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + | env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + | env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnjx.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfsgnjx_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> + env->vfp.vreg[src1].f16[j] ^
> + env->vfp.vreg[src2].f16[j],
> + 0,
> + 15,
> + env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> + env->vfp.vreg[src1].f32[j] ^
> + env->vfp.vreg[src2].f32[j],
> + 0,
> + 31,
> + env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> + env->vfp.vreg[src1].f64[j] ^
> + env->vfp.vreg[src2].f64[j],
> + 0,
> + 63,
> + env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnjx.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfsgnjx_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> + env->fpr[rs1] ^
> + env->vfp.vreg[src2].f16[j],
> + 0,
> + 15,
> + env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> + env->fpr[rs1] ^
> + env->vfp.vreg[src2].f32[j],
> + 0,
> + 31,
> + env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> + env->fpr[rs1] ^
> + env->vfp.vreg[src2].f64[j],
> + 0,
> + 63,
> + env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vxor_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src1].u8[j]
> + ^ env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + ^ env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + ^ env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + ^ env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vxor_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + ^ env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + ^ env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + ^ env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + ^ env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vxor_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + ^ env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + ^ env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + ^ env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + ^ env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadc_vvm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax, carry;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
> + + env->vfp.vreg[src2].u8[j] + carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + + env->vfp.vreg[src2].u16[j] + carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + + env->vfp.vreg[src2].u32[j] + carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + + env->vfp.vreg[src2].u64[j] + carry;
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vadc_vxm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax, carry;
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u8[j] + carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u16[j] + carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u32[j] + carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] =
> (uint64_t)extend_gpr(env->gpr[rs1])
> + + env->vfp.vreg[src2].u64[j] + carry;
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadc_vim)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax, carry;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u8[j] + carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u16[j] + carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u32[j] + carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u64[j] + carry;
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmadc_vvm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax, carry;
> + uint64_t tmp;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src1].u8[j]
> + + env->vfp.vreg[src2].u8[j] + carry;
> + tmp = tmp >> width;
> +
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src1].u16[j]
> + + env->vfp.vreg[src2].u16[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)env->vfp.vreg[src1].u32[j]
> + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src1].u64[j]
> + + env->vfp.vreg[src2].u64[j] + carry;
> +
> + if ((tmp < env->vfp.vreg[src1].u64[j] ||
> + tmp < env->vfp.vreg[src2].u64[j])
> + || (env->vfp.vreg[src1].u64[j] == MAX_U64 &&
> + env->vfp.vreg[src2].u64[j] == MAX_U64)) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmadc_vxm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax, carry;
> + uint64_t tmp, extend_rs1;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint8_t)env->gpr[rs1]
> + + env->vfp.vreg[src2].u8[j] + carry;
> + tmp = tmp >> width;
> +
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint16_t)env->gpr[rs1]
> + + env->vfp.vreg[src2].u16[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)((uint32_t)env->gpr[rs1])
> + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> +
> + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]);
> + tmp = extend_rs1 + env->vfp.vreg[src2].u64[j] +
> carry;
> + if ((tmp < extend_rs1) ||
> + (carry && (env->vfp.vreg[src2].u64[j] ==
> MAX_U64))) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmadc_vim)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax, carry;
> + uint64_t tmp;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint8_t)sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u8[j] + carry;
> + tmp = tmp >> width;
> +
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint16_t)sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u16[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)((uint32_t)sign_extend(rs1, 5))
> + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u64[j] + carry;
> +
> + if ((tmp < (uint64_t)sign_extend(rs1, 5) ||
> + tmp < env->vfp.vreg[src2].u64[j])
> + || ((uint64_t)sign_extend(rs1, 5) == MAX_U64 &&
> + env->vfp.vreg[src2].u64[j] == MAX_U64)) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsbc_vvm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax, carry;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + - env->vfp.vreg[src1].u8[j] - carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + - env->vfp.vreg[src1].u16[j] - carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + - env->vfp.vreg[src1].u32[j] - carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + - env->vfp.vreg[src1].u64[j] - carry;
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsbc_vxm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax, carry;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + - env->gpr[rs1] - carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + - env->gpr[rs1] - carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + - env->gpr[rs1] - carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + - (uint64_t)extend_gpr(env->gpr[rs1]) - carry;
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsbc_vvm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax, carry;
> + uint64_t tmp;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u8[j]
> + - env->vfp.vreg[src1].u8[j] - carry;
> + tmp = (tmp >> width) & 0x1;
> +
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u16[j]
> + - env->vfp.vreg[src1].u16[j] - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)env->vfp.vreg[src2].u32[j]
> + - (uint64_t)env->vfp.vreg[src1].u32[j] - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u64[j]
> + - env->vfp.vreg[src1].u64[j] - carry;
> +
> + if (((env->vfp.vreg[src1].u64[j] == MAX_U64) &&
> carry) ||
> + env->vfp.vreg[src2].u64[j] <
> + (env->vfp.vreg[src1].u64[j] + carry)) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsbc_vxm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax, carry;
> + uint64_t tmp, extend_rs1;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u8[j]
> + - (uint8_t)env->gpr[rs1] - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u16[j]
> + - (uint16_t)env->gpr[rs1] - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)env->vfp.vreg[src2].u32[j]
> + - (uint64_t)((uint32_t)env->gpr[rs1]) - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> +
> + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]);
> + tmp = env->vfp.vreg[src2].u64[j] - extend_rs1 -
> carry;
> +
> + if ((tmp > env->vfp.vreg[src2].u64[j]) ||
> + ((extend_rs1 == MAX_U64) && carry)) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> +
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmpopc.m rd, vs2, v0.t # x[rd] = sum_i ( vs2[i].LSB &&
> v0[i].LSB ) */
> +void VECTOR_HELPER(vmpopc_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + env->gpr[rd] = 0;
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_mask_reg(env, rs2, width, lmul, i) &&
> + vector_elem_mask(env, vm, width, lmul, i)) {
> + env->gpr[rd]++;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfirst.m rd, vs2, vm */
> +void VECTOR_HELPER(vmfirst_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_mask_reg(env, rs2, width, lmul, i) &&
> + vector_elem_mask(env, vm, width, lmul, i)) {
> + env->gpr[rd] = i;
> + break;
> + }
> + } else {
> + env->gpr[rd] = -1;
> + }
> + }
> +
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmerge_vvm)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl, idx, pos;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx,
> &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1)
> == 0) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src1].u8[j];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx,
> &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1)
> == 0) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src1].u16[j];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx,
> &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1)
> == 0) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src1].u32[j];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + case 64:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx,
> &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1)
> == 0) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src1].u64[j];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmerge_vxm)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl, idx, pos;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx,
> &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1)
> == 0) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1];
> + }
> + break;
> + case 16:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx,
> &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1)
> == 0) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1];
> + }
> + break;
> + case 32:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx,
> &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1)
> == 0) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1];
> + }
> + break;
> + case 64:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx,
> &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1)
> == 0) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmerge_vim)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl, idx, pos;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx,
> &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1)
> == 0) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + (uint8_t)sign_extend(rs1, 5);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u8[j] =
> (uint8_t)sign_extend(rs1, 5);
> + }
> + break;
> + case 16:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx,
> &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1)
> == 0) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + (uint16_t)sign_extend(rs1, 5);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u16[j] =
> (uint16_t)sign_extend(rs1, 5);
> + }
> + break;
> + case 32:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx,
> &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1)
> == 0) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + (uint32_t)sign_extend(rs1, 5);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u32[j] =
> (uint32_t)sign_extend(rs1, 5);
> + }
> + break;
> + case 64:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx,
> &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1)
> == 0) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)sign_extend(rs1, 5);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u64[j] =
> (uint64_t)sign_extend(rs1, 5);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmerge.vfm vd, vs2, rs1, v0 # vd[i] = v0[i].LSB ? f[rs1] :
> vs2[i] */
> +void VECTOR_HELPER(vfmerge_vfm)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* vfmv.v.f vd, rs1 # vd[i] = f[rs1]; */
> + if (vm && (rs2 != 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = env->fpr[rs1];
> + } else {
> + env->vfp.vreg[dest].f16[j] =
> env->vfp.vreg[src2].f16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = env->fpr[rs1];
> + } else {
> + env->vfp.vreg[dest].f32[j] =
> env->vfp.vreg[src2].f32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = env->fpr[rs1];
> + } else {
> + env->vfp.vreg[dest].f64[j] =
> env->vfp.vreg[src2].f64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmseq_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] ==
> + env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] ==
> + env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] ==
> + env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] ==
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmseq_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] ==
> env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] ==
> env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] ==
> env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) ==
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmseq_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)sign_extend(rs1, 5)
> + == env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)sign_extend(rs1, 5)
> + == env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)sign_extend(rs1, 5)
> + == env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)sign_extend(rs1, 5) ==
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmandnot.mm <http://vmandnot.mm> vd, vs2, vs1 # vd = vs2 & ~vs1 */
> +void VECTOR_HELPER(vmandnot_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) &
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfeq.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmfeq_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float16_eq_quiet(env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float32_eq_quiet(env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float64_eq_quiet(env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfeq.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfeq_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmsne_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] !=
> + env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] !=
> + env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] !=
> + env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] !=
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsne_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] !=
> env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] !=
> env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] !=
> env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) !=
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsne_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)sign_extend(rs1, 5)
> + != env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)sign_extend(rs1, 5)
> + != env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)sign_extend(rs1, 5)
> + != env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)sign_extend(rs1, 5) !=
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmand.mm <http://vmand.mm> vd, vs2, vs1 # vd = vs2 & vs1 */
> +void VECTOR_HELPER(vmand_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) &
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfle.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmfle_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_le(env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[src1].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_le(env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[src1].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_le(env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[src1].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfle.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfle_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_le(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_le(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_le(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsltu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <
> + env->vfp.vreg[src1].u8[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <
> + env->vfp.vreg[src1].u16[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <
> + env->vfp.vreg[src1].u32[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <
> + env->vfp.vreg[src1].u64[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsltu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <
> (uint8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <
> (uint16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <
> (uint32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <
> + (uint64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmor.mm <http://vmor.mm> vd, vs2, vs1 # vd = vs2 | vs1 */
> +void VECTOR_HELPER(vmor_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) |
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmford.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmford_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float16_unordered_quiet(env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float32_unordered_quiet(env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float64_unordered_quiet(env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmford.vf vd, vs2, rs1, vm # Vector-scalar */
> +void VECTOR_HELPER(vmford_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float16_unordered_quiet(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float32_unordered_quiet(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float64_unordered_quiet(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmslt_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <
> + env->vfp.vreg[src1].s8[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <
> + env->vfp.vreg[src1].s16[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <
> + env->vfp.vreg[src1].s32[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <
> + env->vfp.vreg[src1].s64[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmslt_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <
> (int8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <
> (int16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <
> (int32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <
> + (int64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmxor.mm <http://vmxor.mm> vd, vs2, vs1 # vd = vs2 ^ vs1 */
> +void VECTOR_HELPER(vmxor_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmflt.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmflt_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_lt(env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[src1].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_lt(env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[src1].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_lt(env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[src1].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmflt.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmflt_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_lt(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_lt(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_lt(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsleu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <=
> + env->vfp.vreg[src1].u8[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <=
> + env->vfp.vreg[src1].u16[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <=
> + env->vfp.vreg[src1].u32[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <=
> + env->vfp.vreg[src1].u64[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsleu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <=
> (uint8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <=
> (uint16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <=
> (uint32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <=
> + (uint64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsleu_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <= (uint8_t)rs1) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <=
> (uint16_t)rs1) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <=
> (uint32_t)rs1) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <=
> + (uint64_t)rs1) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmornot.mm <http://vmornot.mm> vd, vs2, vs1 # vd = vs2 | ~vs1 */
> +void VECTOR_HELPER(vmornot_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) |
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfne.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmfne_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float16_eq_quiet(env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float32_eq_quiet(env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float64_eq_quiet(env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfne.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfne_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsle_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <=
> + env->vfp.vreg[src1].s8[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <=
> + env->vfp.vreg[src1].s16[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <=
> + env->vfp.vreg[src1].s32[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <=
> + env->vfp.vreg[src1].s64[j]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsle_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <=
> (int8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <=
> (int16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <=
> (int32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <=
> + (int64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsle_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <=
> + (int8_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <=
> + (int16_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <=
> + (int32_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <=
> + sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmnand.mm <http://vmnand.mm> vd, vs2, vs1 # vd = ~(vs2 & vs1) */
> +void VECTOR_HELPER(vmnand_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) &
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, (~tmp &
> 0x1));
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfgt.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfgt_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_le(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_le(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_le(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsgtu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] >
> (uint8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] >
> (uint16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] >
> (uint32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] >
> + (uint64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsgtu_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] > (uint8_t)rs1) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] > (uint16_t)rs1) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] > (uint32_t)rs1) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] >
> + (uint64_t)rs1) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmnor.mm <http://vmnor.mm> vd, vs2, vs1 # vd = ~(vs2 | vs1) */
> +void VECTOR_HELPER(vmnor_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) |
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmsgt_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] >
> (int8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] >
> (int16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] >
> (int32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] >
> + (int64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsgt_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] >
> + (int8_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] >
> + (int16_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] >
> + (int32_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] >
> + sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul,
> i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul,
> i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +/* vmxnor.mm <http://vmxnor.mm> vd, vs2, vs1 # vd = ~(vs2 ^ vs1) */
> +void VECTOR_HELPER(vmxnor_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfge.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfge_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_lt(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_lt(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_lt(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i,
> !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsaddu.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vsaddu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_add_u8(env,
> + env->vfp.vreg[src1].u8[j],
> env->vfp.vreg[src2].u8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_add_u16(env,
> + env->vfp.vreg[src1].u16[j],
> env->vfp.vreg[src2].u16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_add_u32(env,
> + env->vfp.vreg[src1].u32[j],
> env->vfp.vreg[src2].u32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_add_u64(env,
> + env->vfp.vreg[src1].u64[j],
> env->vfp.vreg[src2].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsaddu.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vsaddu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_add_u8(env,
> + env->vfp.vreg[src2].u8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_add_u16(env,
> + env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_add_u32(env,
> + env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_add_u64(env,
> + env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsaddu.vi <http://vsaddu.vi> vd, vs2, imm, vm #
> vector-immediate */
> +void VECTOR_HELPER(vsaddu_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_add_u8(env,
> + env->vfp.vreg[src2].u8[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_add_u16(env,
> + env->vfp.vreg[src2].u16[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_add_u32(env,
> + env->vfp.vreg[src2].u32[j], rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_add_u64(env,
> + env->vfp.vreg[src2].u64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vdivu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] == 0) {
> + env->vfp.vreg[dest].u8[j] = MAX_U8;
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j] /
> + env->vfp.vreg[src1].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] == 0) {
> + env->vfp.vreg[dest].u16[j] = MAX_U16;
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + / env->vfp.vreg[src1].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] == 0) {
> + env->vfp.vreg[dest].u32[j] = MAX_U32;
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + / env->vfp.vreg[src1].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] == 0) {
> + env->vfp.vreg[dest].u64[j] = MAX_U64;
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + / env->vfp.vreg[src1].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vdivu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u8[j] = MAX_U8;
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j] /
> + (uint8_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u16[j] = MAX_U16;
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + / (uint16_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u32[j] = MAX_U32;
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + / (uint32_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) {
> + env->vfp.vreg[dest].u64[j] = MAX_U64;
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + / (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfdiv.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfdiv_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_div(
> + env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[src1].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_div(
> + env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[src1].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_div(
> + env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[src1].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfdiv.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfdiv_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_div(
> + env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_div(
> + env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_div(
> + env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsadd.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vsadd_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_add_s8(env,
> + env->vfp.vreg[src1].s8[j],
> env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_add_s16(env,
> + env->vfp.vreg[src1].s16[j],
> env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_add_s32(env,
> + env->vfp.vreg[src1].s32[j],
> env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_add_s64(env,
> + env->vfp.vreg[src1].s64[j],
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsadd.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vsadd_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_add_s8(env,
> + env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_add_s16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_add_s32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_add_s64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsadd.vi <http://vsadd.vi> vd, vs2, imm, vm # vector-immediate */
> +void VECTOR_HELPER(vsadd_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_add_s8(env,
> + env->vfp.vreg[src2].s8[j],
> sign_extend(rs1, 5));
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_add_s16(env,
> + env->vfp.vreg[src2].s16[j],
> sign_extend(rs1, 5));
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_add_s32(env,
> + env->vfp.vreg[src2].s32[j],
> sign_extend(rs1, 5));
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_add_s64(env,
> + env->vfp.vreg[src2].s64[j],
> sign_extend(rs1, 5));
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vdiv_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s8[j] == 0) {
> + env->vfp.vreg[dest].s8[j] = -1;
> + } else if ((env->vfp.vreg[src2].s8[j] ==
> MIN_S8) &&
> + (env->vfp.vreg[src1].s8[j] ==
> (int8_t)(-1))) {
> + env->vfp.vreg[dest].s8[j] = MIN_S8;
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j] /
> + env->vfp.vreg[src1].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s16[j] == 0) {
> + env->vfp.vreg[dest].s16[j] = -1;
> + } else if ((env->vfp.vreg[src2].s16[j] ==
> MIN_S16) &&
> + (env->vfp.vreg[src1].s16[j] ==
> (int16_t)(-1))) {
> + env->vfp.vreg[dest].s16[j] = MIN_S16;
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + / env->vfp.vreg[src1].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s32[j] == 0) {
> + env->vfp.vreg[dest].s32[j] = -1;
> + } else if ((env->vfp.vreg[src2].s32[j] ==
> MIN_S32) &&
> + (env->vfp.vreg[src1].s32[j] ==
> (int32_t)(-1))) {
> + env->vfp.vreg[dest].s32[j] = MIN_S32;
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + / env->vfp.vreg[src1].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s64[j] == 0) {
> + env->vfp.vreg[dest].s64[j] = -1;
> + } else if ((env->vfp.vreg[src2].s64[j] ==
> MIN_S64) &&
> + (env->vfp.vreg[src1].s64[j] ==
> (int64_t)(-1))) {
> + env->vfp.vreg[dest].s64[j] = MIN_S64;
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + / env->vfp.vreg[src1].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vdiv_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int8_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s8[j] = -1;
> + } else if ((env->vfp.vreg[src2].s8[j] ==
> MIN_S8) &&
> + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) {
> + env->vfp.vreg[dest].s8[j] = MIN_S8;
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j] /
> + (int8_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int16_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s16[j] = -1;
> + } else if ((env->vfp.vreg[src2].s16[j] ==
> MIN_S16) &&
> + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) {
> + env->vfp.vreg[dest].s16[j] = MIN_S16;
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + / (int16_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int32_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s32[j] = -1;
> + } else if ((env->vfp.vreg[src2].s32[j] ==
> MIN_S32) &&
> + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) {
> + env->vfp.vreg[dest].s32[j] = MIN_S32;
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + / (int32_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) {
> + env->vfp.vreg[dest].s64[j] = -1;
> + } else if ((env->vfp.vreg[src2].s64[j] ==
> MIN_S64) &&
> + ((int64_t)extend_gpr(env->gpr[rs1]) == (int64_t)(-1))) {
> + env->vfp.vreg[dest].s64[j] = MIN_S64;
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + / (int64_t)extend_gpr(env->gpr[rs1]);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] =
> f[rs1]/vs2[i] */
> +void VECTOR_HELPER(vfrdiv_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_div(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_div(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_div(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssubu.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vssubu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env,
> + env->vfp.vreg[src2].u8[j],
> env->vfp.vreg[src1].u8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env,
> + env->vfp.vreg[src2].u16[j],
> env->vfp.vreg[src1].u16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env,
> + env->vfp.vreg[src2].u32[j],
> env->vfp.vreg[src1].u32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env,
> + env->vfp.vreg[src2].u64[j],
> env->vfp.vreg[src1].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssubu.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vssubu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env,
> + env->vfp.vreg[src2].u8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env,
> + env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env,
> + env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env,
> + env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vremu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] == 0) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j] %
> + env->vfp.vreg[src1].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] == 0) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + % env->vfp.vreg[src1].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] == 0) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + % env->vfp.vreg[src1].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] == 0) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + % env->vfp.vreg[src1].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vremu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j] %
> + (uint8_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + % (uint16_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + % (uint32_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + % (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmsbf.m vd, vs2, vm # set-before-first mask bit */
> +void VECTOR_HELPER(vmsbf_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + bool first_mask_bit = false;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (first_mask_bit) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + continue;
> + }
> + if (!vector_mask_reg(env, rs2, width, lmul, i)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + first_mask_bit = true;
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmsif.m vd, vs2, vm # set-including-first mask bit */
> +void VECTOR_HELPER(vmsif_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + bool first_mask_bit = false;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (first_mask_bit) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + continue;
> + }
> + if (!vector_mask_reg(env, rs2, width, lmul, i)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + first_mask_bit = true;
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + }
> + }
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmsof.m vd, vs2, vm # set-only-first mask bit */
> +void VECTOR_HELPER(vmsof_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + bool first_mask_bit = false;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (first_mask_bit) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + continue;
> + }
> + if (!vector_mask_reg(env, rs2, width, lmul, i)) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + first_mask_bit = true;
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + }
> + }
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* viota.m v4, v2, v0.t */
> +void VECTOR_HELPER(viota_m)(CPURISCVState *env, uint32_t vm,
> uint32_t rs2,
> + uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest;
> + uint32_t sum = 0;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 1)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sum;
> + if (vector_mask_reg(env, rs2, width, lmul, i)) {
> + sum++;
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sum;
> + if (vector_mask_reg(env, rs2, width, lmul, i)) {
> + sum++;
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sum;
> + if (vector_mask_reg(env, rs2, width, lmul, i)) {
> + sum++;
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sum;
> + if (vector_mask_reg(env, rs2, width, lmul, i)) {
> + sum++;
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vid.v vd, vm # Write element ID to destination. */
> +void VECTOR_HELPER(vid_v)(CPURISCVState *env, uint32_t vm,
> uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = i;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = i;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = i;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = i;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssub.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vssub_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env,
> + env->vfp.vreg[src2].s8[j],
> env->vfp.vreg[src1].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env,
> + env->vfp.vreg[src2].s16[j],
> env->vfp.vreg[src1].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env,
> + env->vfp.vreg[src2].s32[j],
> env->vfp.vreg[src1].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env,
> + env->vfp.vreg[src2].s64[j],
> env->vfp.vreg[src1].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssub.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vssub_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env,
> + env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vrem_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s8[j] == 0) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j];
> + } else if ((env->vfp.vreg[src2].s8[j] ==
> MIN_S8) &&
> + (env->vfp.vreg[src1].s8[j] ==
> (int8_t)(-1))) {
> + env->vfp.vreg[dest].s8[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j] %
> + env->vfp.vreg[src1].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s16[j] == 0) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j];
> + } else if ((env->vfp.vreg[src2].s16[j] ==
> MIN_S16) &&
> + (env->vfp.vreg[src1].s16[j] ==
> (int16_t)(-1))) {
> + env->vfp.vreg[dest].s16[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + % env->vfp.vreg[src1].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s32[j] == 0) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j];
> + } else if ((env->vfp.vreg[src2].s32[j] ==
> MIN_S32) &&
> + (env->vfp.vreg[src1].s32[j] ==
> (int32_t)(-1))) {
> + env->vfp.vreg[dest].s32[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + % env->vfp.vreg[src1].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s64[j] == 0) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j];
> + } else if ((env->vfp.vreg[src2].s64[j] ==
> MIN_S64) &&
> + (env->vfp.vreg[src1].s64[j] ==
> (int64_t)(-1))) {
> + env->vfp.vreg[dest].s64[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + % env->vfp.vreg[src1].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vrem_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int8_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j];
> + } else if ((env->vfp.vreg[src2].s8[j] ==
> MIN_S8) &&
> + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) {
> + env->vfp.vreg[dest].s8[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j] %
> + (int8_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int16_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j];
> + } else if ((env->vfp.vreg[src2].s16[j] ==
> MIN_S16) &&
> + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) {
> + env->vfp.vreg[dest].s16[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + % (int16_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int32_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j];
> + } else if ((env->vfp.vreg[src2].s32[j] ==
> MIN_S32) &&
> + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) {
> + env->vfp.vreg[dest].s32[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + % (int32_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j];
> + } else if ((env->vfp.vreg[src2].s64[j] ==
> MIN_S64) &&
> + ((int64_t)extend_gpr(env->gpr[rs1]) == (int64_t)(-1))) {
> + env->vfp.vreg[dest].s64[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + % (int64_t)extend_gpr(env->gpr[rs1]);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vaadd.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vaadd_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(env,
> + env->vfp.vreg[src1].s8[j],
> env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(env,
> + env->vfp.vreg[src1].s16[j],
> env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(env,
> + env->vfp.vreg[src1].s32[j],
> env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(env,
> + env->vfp.vreg[src1].s64[j],
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vaadd.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vaadd_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(env,
> + env->gpr[rs1], env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(env,
> + env->gpr[rs1], env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(env,
> + env->gpr[rs1], env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(env,
> + env->gpr[rs1], env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vaadd.vi <http://vaadd.vi> vd, vs2, imm, vm # vector-immediate */
> +void VECTOR_HELPER(vaadd_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(env,
> + rs1, env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(env,
> + rs1, env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(env,
> + rs1, env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(env,
> + rs1, env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmulhu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> + ((uint16_t)env->vfp.vreg[src1].u8[j]
> + * (uint16_t)env->vfp.vreg[src2].u8[j]) >>
> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + ((uint32_t)env->vfp.vreg[src1].u16[j]
> + * (uint32_t)env->vfp.vreg[src2].u16[j])
> >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + ((uint64_t)env->vfp.vreg[src1].u32[j]
> + * (uint64_t)env->vfp.vreg[src2].u32[j])
> >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = u64xu64_lh(
> + env->vfp.vreg[src1].u64[j],
> env->vfp.vreg[src2].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmulhu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> + ((uint16_t)(uint8_t)env->gpr[rs1]
> + * (uint16_t)env->vfp.vreg[src2].u8[j]) >>
> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + ((uint32_t)(uint16_t)env->gpr[rs1]
> + * (uint32_t)env->vfp.vreg[src2].u16[j])
> >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + ((uint64_t)(uint32_t)env->gpr[rs1]
> + * (uint64_t)env->vfp.vreg[src2].u32[j])
> >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = u64xu64_lh(
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + , env->vfp.vreg[src2].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmul.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfmul_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_mul(
> + env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_mul(
> + env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_mul(
> + env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmul.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfmul_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_mul(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_mul(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_mul(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsll_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j]
> + << (env->vfp.vreg[src1].u8[j] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + << (env->vfp.vreg[src1].u16[j] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + << (env->vfp.vreg[src1].u32[j] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + << (env->vfp.vreg[src1].u64[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsll_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j]
> + << (env->gpr[rs1] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + << (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + << (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + << ((uint64_t)extend_gpr(env->gpr[rs1]) &
> 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsll_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j]
> + << (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + << (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + << (rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + << (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmul_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmul_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->gpr[rs1]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = env->gpr[rs1]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = env->gpr[rs1]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> + (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vasub.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vasub_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(
> + env,
> + ~env->vfp.vreg[src1].s8[j] + 1,
> + env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(
> + env,
> + ~env->vfp.vreg[src1].s16[j] + 1,
> + env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(
> + env,
> + ~env->vfp.vreg[src1].s32[j] + 1,
> + env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(
> + env,
> + ~env->vfp.vreg[src1].s64[j] + 1,
> + env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vasub.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vasub_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(
> + env, ~env->gpr[rs1] + 1,
> env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(
> + env, ~env->gpr[rs1] + 1,
> env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(
> + env, ~env->gpr[rs1] + 1,
> env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(
> + env, ~env->gpr[rs1] + 1,
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmulhsu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> + ((uint16_t)env->vfp.vreg[src1].u8[j]
> + * (int16_t)env->vfp.vreg[src2].s8[j]) >>
> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> + ((uint32_t)env->vfp.vreg[src1].u16[j]
> + * (int32_t)env->vfp.vreg[src2].s16[j]) >>
> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> + ((uint64_t)env->vfp.vreg[src1].u32[j]
> + * (int64_t)env->vfp.vreg[src2].s32[j]) >>
> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = s64xu64_lh(
> + env->vfp.vreg[src2].s64[j],
> env->vfp.vreg[src1].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmulhsu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> + ((uint16_t)(uint8_t)env->gpr[rs1]
> + * (int16_t)env->vfp.vreg[src2].s8[j]) >>
> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> + ((uint32_t)(uint16_t)env->gpr[rs1]
> + * (int32_t)env->vfp.vreg[src2].s16[j]) >>
> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> + ((uint64_t)(uint32_t)env->gpr[rs1]
> + * (int64_t)env->vfp.vreg[src2].s32[j]) >>
> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = s64xu64_lh(
> + env->vfp.vreg[src2].s64[j],
> + (uint64_t)extend_gpr(env->gpr[rs1]));
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsmul.vv vd, vs2, vs1, vm # vd[i] =
> clip((vs2[i]*vs1[i]+round)>>(SEW-1)) */
> +void VECTOR_HELPER(vsmul_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if ((!(vm)) && rd == 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vsmul_8(env,
> + env->vfp.vreg[src1].s8[j],
> env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vsmul_16(env,
> + env->vfp.vreg[src1].s16[j],
> env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vsmul_32(env,
> + env->vfp.vreg[src1].s32[j],
> env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vsmul_64(env,
> + env->vfp.vreg[src1].s64[j],
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsmul.vx vd, vs2, rs1, vm # vd[i] =
> clip((vs2[i]*x[rs1]+round)>>(SEW-1)) */
> +void VECTOR_HELPER(vsmul_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if ((!(vm)) && rd == 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vsmul_8(env,
> + env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vsmul_16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vsmul_32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vsmul_64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmulh_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> + ((int16_t)env->vfp.vreg[src1].s8[j]
> + * (int16_t)env->vfp.vreg[src2].s8[j]) >>
> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> + ((int32_t)env->vfp.vreg[src1].s16[j]
> + * (int32_t)env->vfp.vreg[src2].s16[j]) >>
> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> + ((int64_t)env->vfp.vreg[src1].s32[j]
> + * (int64_t)env->vfp.vreg[src2].s32[j]) >>
> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = s64xs64_lh(
> + env->vfp.vreg[src1].s64[j],
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmulh_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> + ((int16_t)(int8_t)env->gpr[rs1]
> + * (int16_t)env->vfp.vreg[src2].s8[j]) >>
> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> + ((int32_t)(int16_t)env->gpr[rs1]
> + * (int32_t)env->vfp.vreg[src2].s16[j]) >>
> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> + ((int64_t)(int32_t)env->gpr[rs1]
> + * (int64_t)env->vfp.vreg[src2].s32[j]) >>
> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = s64xs64_lh(
> + (int64_t)extend_gpr(env->gpr[rs1])
> + , env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfrsub.vf vd, vs2, rs1, vm # Scalar-vector vd[i] = f[rs1] -
> vs2[i] */
> +void VECTOR_HELPER(vfrsub_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_sub(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_sub(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_sub(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsrl_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j]
> + >> (env->vfp.vreg[src1].u8[j] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + >> (env->vfp.vreg[src1].u16[j] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + >> (env->vfp.vreg[src1].u32[j] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + >> (env->vfp.vreg[src1].u64[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsrl_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j]
> + >> (env->gpr[rs1] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + >> (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + >> (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + >> ((uint64_t)extend_gpr(env->gpr[rs1]) &
> 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsrl_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j]
> + >> (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + >> (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + >> (rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + >> (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmadd.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) + vs2[i] */
> +void VECTOR_HELPER(vfmadd_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[dest].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[dest].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[dest].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmadd.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) + vs2[i] */
> +void VECTOR_HELPER(vfmadd_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[dest].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[dest].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[dest].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsra_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j]
> + >> (env->vfp.vreg[src1].s8[j] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + >> (env->vfp.vreg[src1].s16[j] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + >> (env->vfp.vreg[src1].s32[j] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + >> (env->vfp.vreg[src1].s64[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsra_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j]
> + >> (env->gpr[rs1] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + >> (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + >> (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + >> ((uint64_t)extend_gpr(env->gpr[rs1]) &
> 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsra_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j]
> + >> (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + >> (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + >> (rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + >> (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmadd_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[dest].s8[j]
> + + env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[dest].s16[j]
> + + env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[dest].s32[j]
> + + env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[dest].s64[j]
> + + env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmadd_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->gpr[rs1]
> + * env->vfp.vreg[dest].s8[j]
> + + env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = env->gpr[rs1]
> + * env->vfp.vreg[dest].s16[j]
> + + env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = env->gpr[rs1]
> + * env->vfp.vreg[dest].s32[j]
> + + env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> + (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[dest].s64[j]
> + + env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmadd.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) -
> vs2[i] */
> +void VECTOR_HELPER(vfnmadd_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[dest].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[dest].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[dest].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmadd.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) -
> vs2[i] */
> +void VECTOR_HELPER(vfnmadd_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[dest].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[dest].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[dest].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssrl.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i] */
> +void VECTOR_HELPER(vssrl_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = vssrl_8(env,
> + env->vfp.vreg[src2].u8[j],
> env->vfp.vreg[src1].u8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = vssrl_16(env,
> + env->vfp.vreg[src2].u16[j],
> env->vfp.vreg[src1].u16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = vssrl_32(env,
> + env->vfp.vreg[src2].u32[j],
> env->vfp.vreg[src1].u32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = vssrl_64(env,
> + env->vfp.vreg[src2].u64[j],
> env->vfp.vreg[src1].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssrl.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */
> +void VECTOR_HELPER(vssrl_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = vssrl_8(env,
> + env->vfp.vreg[src2].u8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = vssrl_16(env,
> + env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = vssrl_32(env,
> + env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = vssrl_64(env,
> + env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssrl.vi <http://vssrl.vi> vd, vs2, imm, vm # vd[i] = ((vs2[i]
> + round)>>imm) */
> +void VECTOR_HELPER(vssrl_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = vssrli_8(env,
> + env->vfp.vreg[src2].u8[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = vssrli_16(env,
> + env->vfp.vreg[src2].u16[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = vssrli_32(env,
> + env->vfp.vreg[src2].u32[j], rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = vssrli_64(env,
> + env->vfp.vreg[src2].u64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmsub.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) - vs2[i] */
> +void VECTOR_HELPER(vfmsub_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[dest].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[dest].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[dest].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmsub.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) - vs2[i] */
> +void VECTOR_HELPER(vfmsub_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[dest].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[dest].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[dest].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssra.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i]) */
> +void VECTOR_HELPER(vssra_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vssra_8(env,
> + env->vfp.vreg[src2].s8[j],
> env->vfp.vreg[src1].u8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vssra_16(env,
> + env->vfp.vreg[src2].s16[j],
> env->vfp.vreg[src1].u16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vssra_32(env,
> + env->vfp.vreg[src2].s32[j],
> env->vfp.vreg[src1].u32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vssra_64(env,
> + env->vfp.vreg[src2].s64[j],
> env->vfp.vreg[src1].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssra.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */
> +void VECTOR_HELPER(vssra_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vssra_8(env,
> + env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vssra_16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vssra_32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vssra_64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssra.vi <http://vssra.vi> vd, vs2, imm, vm # vd[i] = ((vs2[i]
> + round)>>imm) */
> +void VECTOR_HELPER(vssra_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vssrai_8(env,
> + env->vfp.vreg[src2].s8[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vssrai_16(env,
> + env->vfp.vreg[src2].s16[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vssrai_32(env,
> + env->vfp.vreg[src2].s32[j], rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vssrai_64(env,
> + env->vfp.vreg[src2].s64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vnmsub_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j]
> + - env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[dest].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + - env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[dest].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + - env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[dest].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + - env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[dest].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnmsub_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j]
> + - env->gpr[rs1]
> + * env->vfp.vreg[dest].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + - env->gpr[rs1]
> + * env->vfp.vreg[dest].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + - env->gpr[rs1]
> + * env->vfp.vreg[dest].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + - (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[dest].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmsub.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) +
> vs2[i] */
> +void VECTOR_HELPER(vfnmsub_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[dest].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[dest].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[dest].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> +
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmsub.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) +
> vs2[i] */
> +void VECTOR_HELPER(vfnmsub_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[dest].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[dest].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[dest].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vnsrl_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u16[k]
> + >> (env->vfp.vreg[src1].u8[j] & 0xf);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u32[k]
> + >> (env->vfp.vreg[src1].u16[j] & 0x1f);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u64[k]
> + >> (env->vfp.vreg[src1].u32[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsrl_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u16[k]
> + >> (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u32[k]
> + >> (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u64[k]
> + >> (env->gpr[rs1] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsrl_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u16[k]
> + >> (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u32[k]
> + >> (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u64[k]
> + >> (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfmacc_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[dest].f16[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[dest].f32[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[dest].f64[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfmacc_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[dest].f16[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[dest].f32[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[dest].f64[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsra_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s16[k]
> + >> (env->vfp.vreg[src1].s8[j] & 0xf);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s32[k]
> + >> (env->vfp.vreg[src1].s16[j] & 0x1f);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s64[k]
> + >> (env->vfp.vreg[src1].s32[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsra_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s16[k]
> + >> (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s32[k]
> + >> (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s64[k]
> + >> (env->gpr[rs1] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsra_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s16[k]
> + >> (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s32[k]
> + >> (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s64[k]
> + >> (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmacc_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] +=
> env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] +=
> env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] +=
> env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] +=
> env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmacc_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] += env->gpr[rs1]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] += env->gpr[rs1]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] += env->gpr[rs1]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] +=
> + (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) -
> vd[i] */
> +void VECTOR_HELPER(vfnmacc_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) -
> vd[i] */
> +void VECTOR_HELPER(vfnmacc_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclipu.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vnclipu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, k, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[k] = vnclipu_16(env,
> + env->vfp.vreg[src2].u16[j],
> env->vfp.vreg[src1].u8[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vnclipu_32(env,
> + env->vfp.vreg[src2].u32[j],
> env->vfp.vreg[src1].u16[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vnclipu_64(env,
> + env->vfp.vreg[src2].u64[j],
> env->vfp.vreg[src1].u32[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclipu.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vnclipu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[k] = vnclipu_16(env,
> + env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vnclipu_32(env,
> + env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vnclipu_64(env,
> + env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +
> +/* vnclipu.vi <http://vnclipu.vi> vd, vs2, imm, vm #
> vector-immediate */
> +void VECTOR_HELPER(vnclipu_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[k] = vnclipui_16(env,
> + env->vfp.vreg[src2].u16[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vnclipui_32(env,
> + env->vfp.vreg[src2].u32[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vnclipui_64(env,
> + env->vfp.vreg[src2].u64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfmsac_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfmsac_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclip.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vnclip_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, k, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[k] = vnclip_16(env,
> + env->vfp.vreg[src2].s16[j],
> env->vfp.vreg[src1].u8[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vnclip_32(env,
> + env->vfp.vreg[src2].s32[j],
> env->vfp.vreg[src1].u16[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vnclip_64(env,
> + env->vfp.vreg[src2].s64[j],
> env->vfp.vreg[src1].u32[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclip.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vnclip_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, k, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[k] = vnclip_16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vnclip_32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vnclip_64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclip.vi <http://vnclip.vi> vd, vs2, imm, vm #
> vector-immediate */
> +void VECTOR_HELPER(vnclip_vi)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, k, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[k] = vnclipi_16(env,
> + env->vfp.vreg[src2].s16[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vnclipi_32(env,
> + env->vfp.vreg[src2].s32[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vnclipi_64(env,
> + env->vfp.vreg[src2].s64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vnmsac_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] -=
> env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] -=
> env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] -=
> env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] -=
> env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnmsac_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] -= env->gpr[rs1]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] -= env->gpr[rs1]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] -= env->gpr[rs1]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] -=
> + (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) +
> vd[i] */
> +void VECTOR_HELPER(vfnmsac_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) +
> vd[i] */
> +void VECTOR_HELPER(vfnmsac_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vwredsumu.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW +
> sum(zero-extend(SEW)) */
> +void VECTOR_HELPER(vwredsumu_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t sum = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u8[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u16[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = sum;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u16[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u32[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = sum;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u32[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u64[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = sum;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwaddu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src1].u8[j] +
> + (uint16_t)env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src1].u16[j] +
> + (uint32_t)env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src1].u32[j] +
> + (uint64_t)env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwaddu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u8[j] +
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u16[j] +
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u32[j] +
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwadd.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwadd_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_add(
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src1].f16[j], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_add(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwadd.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwadd_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_add(
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_add(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->fpr[rs1], &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vwredsum.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW +
> sum(sign-extend(SEW)) */
> +void VECTOR_HELPER(vwredsum_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + int64_t sum = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += (int16_t)env->vfp.vreg[src2].s8[j] <<
> 8 >> 8;
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].s16[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s16[0] = sum;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += (int32_t)env->vfp.vreg[src2].s16[j] <<
> 16 >> 16;
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].s32[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s32[0] = sum;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += (int64_t)env->vfp.vreg[src2].s32[j] <<
> 32 >> 32;
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].s64[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s64[0] = sum;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwadd_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src1].s8[j] +
> + (int16_t)env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src1].s16[j] +
> + (int32_t)env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src1].s32[j] +
> + (int64_t)env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwadd_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) +
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) +
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) +
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vd, vs2, vs1, vm # Unordered reduce 2*SEW = 2*SEW +
> sum(promote(SEW)) */
> +void VECTOR_HELPER(vfwredsum_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + float32 sum32 = 0.0f;
> + float64 sum64 = 0.0f;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 16:
> + if (i == 0) {
> + sum32 = env->vfp.vreg[rs1].f32[0];
> + }
> + �� if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum32 = float32_add(sum32,
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> + true, &env->fp_status),
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f32[0] = sum32;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + sum64 = env->vfp.vreg[rs1].f64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum64 = float64_add(sum64,
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f64[0] = sum64;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsubu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u8[j] -
> + (uint16_t)env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u16[j] -
> + (uint32_t)env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u32[j] -
> + (uint64_t)env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsubu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u8[j] -
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u16[j] -
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u32[j] -
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwsub.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwsub_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_sub(
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src1].f16[j], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_sub(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwsub.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwsub_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_sub(
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_sub(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->fpr[rs1], &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwsub_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s8[j] -
> + (int16_t)env->vfp.vreg[src1].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s16[j] -
> + (int32_t)env->vfp.vreg[src1].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s32[j] -
> + (int64_t)env->vfp.vreg[src1].s32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwsub_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) -
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) -
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) -
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vfwredosum.vs vd, vs2, vs1, vm #
> + * Ordered reduce 2*SEW = 2*SEW + sum(promote(SEW))
> + */
> +void VECTOR_HELPER(vfwredosum_vs)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + helper_vector_vfwredsum_vs(env, vm, rs1, rs2, rd);
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwaddu_wv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src1].u8[j] +
> + (uint16_t)env->vfp.vreg[src2].u16[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src1].u16[j] +
> + (uint32_t)env->vfp.vreg[src2].u32[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src1].u32[j] +
> + (uint64_t)env->vfp.vreg[src2].u64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwaddu_wx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u16[k] +
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u32[k] +
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u64[k] +
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwadd.wv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwadd_wv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_add(
> + env->vfp.vreg[src2].f32[k],
> + float16_to_float32(env->vfp.vreg[src1].f16[j], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_add(
> + env->vfp.vreg[src2].f64[k],
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwadd.wf <http://vfwadd.wf> vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwadd_wf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_add(
> + env->vfp.vreg[src2].f32[k],
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_add(
> + env->vfp.vreg[src2].f64[k],
> + float32_to_float64(env->fpr[rs1], &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwadd_wv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]) +
> + (int16_t)env->vfp.vreg[src2].s16[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]) +
> + (int32_t)env->vfp.vreg[src2].s32[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]) +
> + (int64_t)env->vfp.vreg[src2].s64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwadd_wx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s16[k] +
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s32[k] +
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s64[k] +
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsubu_wv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u16[k] -
> + (uint16_t)env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u32[k] -
> + (uint32_t)env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u64[k] -
> + (uint64_t)env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsubu_wx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u16[k] -
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u32[k] -
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u64[k] -
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwsub.wv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwsub_wv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_sub(
> + env->vfp.vreg[src2].f32[k],
> + float16_to_float32(env->vfp.vreg[src1].f16[j], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_sub(
> + env->vfp.vreg[src2].f64[k],
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwsub.wf <http://vfwsub.wf> vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwsub_wf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_sub(
> + env->vfp.vreg[src2].f32[k],
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_sub(
> + env->vfp.vreg[src2].f64[k],
> + float32_to_float64(env->fpr[rs1], &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsub_wv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s16[k] -
> + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s32[k] -
> + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s64[k] -
> + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsub_wx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s16[k] -
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s32[k] -
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s64[k] -
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmulu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src1].u8[j] *
> + (uint16_t)env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src1].u16[j] *
> + (uint32_t)env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src1].u32[j] *
> + (uint64_t)env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmulu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u8[j] *
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u16[j] *
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u32[j] *
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmul.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwmul_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_mul(
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src1].f16[j], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_mul(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmul.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwmul_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_mul(
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_mul(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->fpr[rs1], &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmulsu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s8[j] *
> + (uint16_t)env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s16[j] *
> + (uint32_t)env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s32[j] *
> + (uint64_t)env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmulsu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmul_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src1].s8[j] *
> + (int16_t)env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src1].s16[j] *
> + (int32_t)env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src1].s32[j] *
> + (int64_t)env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmul_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccu.vv vd, vs1, vs2, vm #
> + * vd[i] = clipu((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env,
> + env->vfp.vreg[src2].u8[j],
> + env->vfp.vreg[src1].u8[j],
> + env->vfp.vreg[dest].u16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env,
> + env->vfp.vreg[src2].u16[j],
> + env->vfp.vreg[src1].u16[j],
> + env->vfp.vreg[dest].u32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env,
> + env->vfp.vreg[src2].u32[j],
> + env->vfp.vreg[src1].u32[j],
> + env->vfp.vreg[dest].u64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccu.vx vd, rs1, vs2, vm #
> + * vd[i] = clipu((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env,
> + env->vfp.vreg[src2].u8[j],
> + env->gpr[rs1],
> + env->vfp.vreg[dest].u16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env,
> + env->vfp.vreg[src2].u16[j],
> + env->gpr[rs1],
> + env->vfp.vreg[dest].u32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env,
> + env->vfp.vreg[src2].u32[j],
> + env->gpr[rs1],
> + env->vfp.vreg[dest].u64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmaccu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] +=
> + (uint16_t)env->vfp.vreg[src1].u8[j] *
> + (uint16_t)env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] +=
> + (uint32_t)env->vfp.vreg[src1].u16[j] *
> + (uint32_t)env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] +=
> + (uint64_t)env->vfp.vreg[src1].u32[j] *
> + (uint64_t)env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmaccu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] +=
> + (uint16_t)env->vfp.vreg[src2].u8[j] *
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] +=
> + (uint32_t)env->vfp.vreg[src2].u16[j] *
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] +=
> + (uint64_t)env->vfp.vreg[src2].u32[j] *
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) +
> vd[i] */
> +void VECTOR_HELPER(vfwmacc_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + float16_to_float32(env->vfp.vreg[src1].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j], true,
> + &env->fp_status),
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) +
> vd[i] */
> +void VECTOR_HELPER(vfwmacc_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + env->fpr[rs1],
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j], true,
> + &env->fp_status),
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + env->fpr[rs1],
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmacc.vv vd, vs1, vs2, vm #
> + * vd[i] = clip((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmacc_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env,
> + env->vfp.vreg[src2].s8[j],
> + env->vfp.vreg[src1].s8[j],
> + env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env,
> + env->vfp.vreg[src2].s16[j],
> + env->vfp.vreg[src1].s16[j],
> + env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env,
> + env->vfp.vreg[src2].s32[j],
> + env->vfp.vreg[src1].s32[j],
> + env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmacc.vx vd, rs1, vs2, vm #
> + * vd[i] = clip((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmacc_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env,
> + env->vfp.vreg[src2].s8[j],
> + env->gpr[rs1],
> + env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env,
> + env->vfp.vreg[src2].s16[j],
> + env->gpr[rs1],
> + env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env,
> + env->vfp.vreg[src2].s32[j],
> + env->gpr[rs1],
> + env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccsu.vv vd, vs1, vs2, vm
> + * # vd[i] =
> clip(-((signed(vs1[i])*unsigned(vs2[i])+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccsu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env,
> + env->vfp.vreg[src2].u8[j],
> + env->vfp.vreg[src1].s8[j],
> + env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env,
> + env->vfp.vreg[src2].u16[j],
> + env->vfp.vreg[src1].s16[j],
> + env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env,
> + env->vfp.vreg[src2].u32[j],
> + env->vfp.vreg[src1].s32[j],
> + env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccsu.vx vd, rs1, vs2, vm
> + * # vd[i] =
> clip(-((signed(x[rs1])*unsigned(vs2[i])+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccsu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env,
> + env->vfp.vreg[src2].u8[j],
> + env->gpr[rs1],
> + env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env,
> + env->vfp.vreg[src2].u16[j],
> + env->gpr[rs1],
> + env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env,
> + env->vfp.vreg[src2].u32[j],
> + env->gpr[rs1],
> + env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccus.vx vd, rs1, vs2, vm
> + * # vd[i] =
> clip(-((unsigned(x[rs1])*signed(vs2[i])+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccus_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmaccus_8(env,
> + env->vfp.vreg[src2].s8[j],
> + env->gpr[rs1],
> + env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmaccus_16(env,
> + env->vfp.vreg[src2].s16[j],
> + env->gpr[rs1],
> + env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmaccus_32(env,
> + env->vfp.vreg[src2].s32[j],
> + env->gpr[rs1],
> + env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vwmacc_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (int16_t)env->vfp.vreg[src1].s8[j]
> + * (int16_t)env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (int32_t)env->vfp.vreg[src1].s16[j] *
> + (int32_t)env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (int64_t)env->vfp.vreg[src1].s32[j] *
> + (int64_t)env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmacc_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) -
> vd[i] */
> +void VECTOR_HELPER(vfwnmacc_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + float16_to_float32(env->vfp.vreg[src1].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j], true,
> + &env->fp_status),
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) -
> vd[i] */
> +void VECTOR_HELPER(vfwnmacc_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + env->fpr[rs1],
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j], true,
> + &env->fp_status),
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + env->fpr[rs1],
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmaccsu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (int16_t)env->vfp.vreg[src1].s8[j]
> + * (uint16_t)env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (int32_t)env->vfp.vreg[src1].s16[j] *
> + (uint32_t)env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (int64_t)env->vfp.vreg[src1].s32[j] *
> + (uint64_t)env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmaccsu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (uint16_t)((uint8_t)env->vfp.vreg[src2].u8[j]) *
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (uint32_t)((uint16_t)env->vfp.vreg[src2].u16[j]) *
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (uint64_t)((uint32_t)env->vfp.vreg[src2].u32[j]) *
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) -
> vd[i] */
> +void VECTOR_HELPER(vfwmsac_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + float16_to_float32(env->vfp.vreg[src1].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j], true,
> + &env->fp_status),
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) -
> vd[i] */
> +void VECTOR_HELPER(vfwmsac_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + env->fpr[rs1],
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j], true,
> + &env->fp_status),
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + env->fpr[rs1],
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmaccus_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) +
> vd[i] */
> +void VECTOR_HELPER(vfwnmsac_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + float16_to_float32(env->vfp.vreg[src1].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j], true,
> + &env->fp_status),
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) +
> vd[i] */
> +void VECTOR_HELPER(vfwnmsac_vf)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + env->fpr[rs1],
> + float16_to_float32(env->vfp.vreg[src2].f16[j], true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j], true,
> + &env->fp_status),
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + env->fpr[rs1],
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +
> +/* vfsqrt.v vd, vs2, vm # Vector-vector square root */
> +void VECTOR_HELPER(vfsqrt_v)(CPURISCVState *env, uint32_t vm,
> uint32_t rs2,
> + uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_sqrt(
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_sqrt(
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_sqrt(
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfclass.v vd, vs2, vm # Vector-vector */
> +void VECTOR_HELPER(vfclass_v)(CPURISCVState *env, uint32_t vm,
> uint32_t rs2,
> + uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = helper_fclass_h(
> + env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = helper_fclass_s(
> + env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = helper_fclass_d(
> + env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
> +void VECTOR_HELPER(vfcvt_xu_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = float16_to_uint16(
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = float32_to_uint32(
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = float64_to_uint64(
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
> +void VECTOR_HELPER(vfcvt_x_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + �� env->vfp.vreg[dest].s16[j] = float16_to_int16(
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = float32_to_int32(
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = float64_to_int64(
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
> +void VECTOR_HELPER(vfcvt_f_xu_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = uint16_to_float16(
> + env->vfp.vreg[src2].u16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = uint32_to_float32(
> + env->vfp.vreg[src2].u32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = uint64_to_float64(
> + env->vfp.vreg[src2].u64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
> +void VECTOR_HELPER(vfcvt_f_x_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = int16_to_float16(
> + env->vfp.vreg[src2].s16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = int32_to_float32(
> + env->vfp.vreg[src2].s32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = int64_to_float64(
> + env->vfp.vreg[src2].s64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width
> unsigned integer.*/
> +void VECTOR_HELPER(vfwcvt_xu_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = float16_to_uint32(
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] = float32_to_uint64(
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + } else {
> + vector_tail_fwiden(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width
> signed integer. */
> +void VECTOR_HELPER(vfwcvt_x_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = float16_to_int32(
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = float32_to_int64(
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to
> double-width float */
> +void VECTOR_HELPER(vfwcvt_f_xu_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = uint16_to_float32(
> + env->vfp.vreg[src2].u16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = uint32_to_float64(
> + env->vfp.vreg[src2].u32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width
> float. */
> +void VECTOR_HELPER(vfwcvt_f_x_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = int16_to_float32(
> + env->vfp.vreg[src2].s16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = int32_to_float64(
> + env->vfp.vreg[src2].s32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vfwcvt.f.f.v vd, vs2, vm #
> + * Convert single-width float to double-width float.
> + */
> +void VECTOR_HELPER(vfwcvt_f_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float16_to_float32(
> + env->vfp.vreg[src2].f16[j],
> + true,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float32_to_float64(
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
> +void VECTOR_HELPER(vfncvt_xu_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = float32_to_uint16(
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = float64_to_uint32(
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to
> signed integer. */
> +void VECTOR_HELPER(vfncvt_x_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = float32_to_int16(
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = float64_to_int32(
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned
> integer to float */
> +void VECTOR_HELPER(vfncvt_f_xu_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[k] = uint32_to_float16(
> + env->vfp.vreg[src2].u32[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = uint64_to_float32(
> + env->vfp.vreg[src2].u64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to
> float. */
> +void VECTOR_HELPER(vfncvt_f_x_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[k] = int32_to_float16(
> + env->vfp.vreg[src2].s32[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = int64_to_float32(
> + ��
> env->vfp.vreg[src2].s64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.f.f.v vd, vs2, vm # Convert double float to
> single-width float. */
> +void VECTOR_HELPER(vfncvt_f_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[k] = float32_to_float16(
> + env->vfp.vreg[src2].f32[j],
> + true,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float64_to_float32(
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlbu_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlb_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s8[j] =
> + cpu_ldsb_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] +
> read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] +
> read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] +
> read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlsbu_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlsb_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].s8[j] =
> + cpu_ldsb_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] +
> read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] +
> read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] +
> read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxbu_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxb_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + env->vfp.vreg[dest + k * lmul].s8[j] =
> + cpu_ldsb_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> sign_extend(
> + cpu_ldsb_data(env, addr), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsb_data(env, addr), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsb_data(env, addr), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlbuff_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlbff_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s8[j] =
> + cpu_ldsb_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] +
> read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] +
> read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] +
> read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlhu_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlh_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> + cpu_ldsw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] +
> read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] +
> read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlshu_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlsh_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> + cpu_ldsw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] +
> read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] +
> read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxhu_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 2, width, k);
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 2, width, k);
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_lduw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 2, width, k);
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_lduw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxh_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 2, width, k);
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> + cpu_ldsw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 2, width, k);
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsw_data(env, addr), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 2, width, k);
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsw_data(env, addr), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlhuff_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlhff_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> + cpu_ldsw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] +
> read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] +
> read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlw_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldl_data(env, env->gpr[rs1] +
> read), 32);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlwu_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlswu_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlsw_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldl_data(env, env->gpr[rs1] +
> read), 32);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxwu_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 4, width, k);
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 4, width, k);
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldl_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxw_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 4, width, k);
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> + cpu_ldl_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 4, width, k);
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldl_data(env, addr), 32);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlwuff_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlwff_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldl_data(env, env->gpr[rs1] +
> read), 32);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vle_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 8;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldq_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlse_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 8;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldq_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxe_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 2, width, k);
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 4, width, k);
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 8, width, k);
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldq_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vleff_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->vfp.vl = 0;
> + env->foflag = true;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 8;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldq_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsb_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vssb_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsxb_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsuxb_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + return VECTOR_HELPER(vsxb_v)(env, nf, vm, rs1, rs2, rd);
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsh_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vssh_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsxh_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 2, width, k);
> + cpu_stw_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 2, width, k);
> + cpu_stw_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 2, width, k);
> + cpu_stw_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsuxh_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + return VECTOR_HELPER(vsxh_v)(env, nf, vm, rs1, rs2, rd);
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsw_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vssw_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsxw_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 4, width, k);
> + cpu_stl_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 4, width, k);
> + cpu_stl_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsuxw_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + return VECTOR_HELPER(vsxw_v)(env, nf, vm, rs1, rs2, rd);
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vse_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 8;
> + cpu_stq_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsse_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 8;
> + cpu_stq_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsxe_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul,
> vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 1, width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 2, width, k);
> + cpu_stw_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 4, width, k);
> + cpu_stl_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2,
> j, 8, width, k);
> + cpu_stq_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsuxe_v)(CPURISCVState *env, uint32_t nf,
> uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + return VECTOR_HELPER(vsxe_v)(env, nf, vm, rs1, rs2, rd);
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoswapw_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_xchgl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_xchgl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_xchgl_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_xchgl_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamoswapd_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_xchgq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_xchgq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> +
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoaddw_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_addl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_addl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_addl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_addl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vamoaddd_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_addq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_addq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoxorw_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_xorl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_xorl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamoxord_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_xorq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_xorq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoandw_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_andl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_andl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_andl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_andl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoandd_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_andq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_andq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoorw_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_orl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_orl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_orl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_orl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoord_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_orq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_orq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamominw_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_sminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_sminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamomind_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_sminq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_sminq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamomaxw_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_smaxl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_smaxl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamomaxd_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_smaxq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_smaxq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamominuw_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_uminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_uminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_uminl_le(
> + env, addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_uminl_le(
> + env, addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamominud_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_uminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_uminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_uminq_le(
> + env, addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_uminq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamomaxuw_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_umaxl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_umaxl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_umaxl_le(
> + env, addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_umaxl_le(
> + env, addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vamomaxud_v)(CPURISCVState *env, uint32_t wd,
> uint32_t vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_umaxq_le(
> + env, addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_umaxq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env,
> RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> --
> 2.7.4
>
>
[-- Attachment #2: Type: text/html, Size: 1707750 bytes --]
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 9:08 ` [Qemu-riscv] " Alex Bennée
@ 2019-08-29 13:35 ` liuzhiwei
-1 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-08-29 13:35 UTC (permalink / raw)
To: Alex Bennée
Cc: peter.maydell, palmer, qemu-riscv, sagark, kbastian, riku.voipio,
qemu-devel, laurent, Alistair.Francis, aurelien
Hi, Alex
On 2019/8/28 下午5:08, Alex Bennée wrote:
> liuzhiwei <zhiwei_liu@c-sky.com> writes:
>
>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>> ---
>> fpu/softfloat.c | 119 +
>> include/fpu/softfloat.h | 4 +
> Changes to softfloat should be in a separate patch, but see bellow.
>
>> linux-user/riscv/cpu_loop.c | 8 +-
>> target/riscv/Makefile.objs | 2 +-
>> target/riscv/cpu.h | 30 +
>> target/riscv/cpu_bits.h | 15 +
>> target/riscv/cpu_helper.c | 7 +
>> target/riscv/csr.c | 65 +-
>> target/riscv/helper.h | 354 +
>> target/riscv/insn32.decode | 374 +-
>> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>> target/riscv/translate.c | 1 +
>> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
> This is likely too big to be reviewed. Is it possible to split the patch
> up into more discrete chunks, for example support pieces and then maybe
> a class at a time?
Yes, a patch set with cover letter will be sent later.
>
>> 13 files changed, 28017 insertions(+), 9 deletions(-)
>> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
>> create mode 100644 target/riscv/vector_helper.c
>>
>> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
>> index 2ba36ec..da155ea 100644
>> --- a/fpu/softfloat.c
>> +++ b/fpu/softfloat.c
>> @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a)
>> }
>>
>> /*----------------------------------------------------------------------------
>> +| Returns the sign bit of the half-precision floating-point value `a'.
>> +*----------------------------------------------------------------------------*/
>> +
>> +static inline flag extractFloat16Sign(float16 a)
>> +{
>> + return float16_val(a) >> 0xf;
>> +}
>> +
> We are trying to avoid this sort of bit fiddling for new code when we
> already have generic decompose functions that can extract all the parts
> into a common format.
>
>> +
>> +/*----------------------------------------------------------------------------
>> | Returns the fraction bits of the single-precision floating-point value `a'.
>> *----------------------------------------------------------------------------*/
>>
>> @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status *status)
>> }
>>
>> /*----------------------------------------------------------------------------
>> +| Returns 1 if the half-precision floating-point value `a' is less than
>> +| or equal to the corresponding value `b', and 0 otherwise. The invalid
>> +| exception is raised if either operand is a NaN. The comparison is performed
>> +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>> +*----------------------------------------------------------------------------*/
>> +
>> +int float16_le(float16 a, float16 b, float_status *status)
>> +{
>> + flag aSign, bSign;
>> + uint16_t av, bv;
>> + a = float16_squash_input_denormal(a, status);
>> + b = float16_squash_input_denormal(b, status);
>> +
>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
>> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
>> + ) {
>> + float_raise(float_flag_invalid, status);
>> + return 0;
>> + }
>> + aSign = extractFloat16Sign( a );
>> + bSign = extractFloat16Sign( b );
>> + av = float16_val(a);
>> + bv = float16_val(b);
>> + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 ) == 0 );
>> + return ( av == bv ) || ( aSign ^ ( av < bv ) );
>> +
>> +}
> What does this provide that:
>
> float16_compare(a, b, status) == float_relation_less;
>
> doesn't?
>
>> +
>> +/*----------------------------------------------------------------------------
>> | Returns 1 if the single-precision floating-point value `a' is less than
>> | or equal to the corresponding value `b', and 0 otherwise. The invalid
>> | exception is raised if either operand is a NaN. The comparison is performed
>> @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status *status)
>> | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>> *----------------------------------------------------------------------------*/
>>
>> +int float16_lt(float16 a, float16 b, float_status *status)
>> +{
>> + flag aSign, bSign;
>> + uint16_t av, bv;
>> + a = float16_squash_input_denormal(a, status);
>> + b = float16_squash_input_denormal(b, status);
>> +
>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
>> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
>> + ) {
>> + float_raise(float_flag_invalid, status);
>> + return 0;
>> + }
>> + aSign = extractFloat16Sign( a );
>> + bSign = extractFloat16Sign( b );
>> + av = float16_val(a);
>> + bv = float16_val(b);
>> + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 ) != 0 );
>> + return ( av != bv ) && ( aSign ^ ( av < bv ) );
>> +
>> +}
>> +
>> +/*----------------------------------------------------------------------------
>> +| Returns 1 if the single-precision floating-point value `a' is less than
>> +| the corresponding value `b', and 0 otherwise. The invalid exception is
>> +| raised if either operand is a NaN. The comparison is performed according
>> +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>> +*----------------------------------------------------------------------------*/
>> +
>> int float32_lt(float32 a, float32 b, float_status *status)
>> {
>> flag aSign, bSign;
>> @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b, float_status *status)
>> }
>>
>> /*----------------------------------------------------------------------------
>> +| Returns 1 if the half-precision floating-point value `a' is equal to
>> +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
>> +| exception. The comparison is performed according to the IEC/IEEE Standard
>> +| for Binary Floating-Point Arithmetic.
>> +*----------------------------------------------------------------------------*/
>> +
>> +int float16_eq_quiet(float16 a, float16 b, float_status *status)
>> +{
>> + a = float16_squash_input_denormal(a, status);
>> + b = float16_squash_input_denormal(b, status);
>> +
>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
>> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
>> + ) {
>> + if (float16_is_signaling_nan(a, status)
>> + || float16_is_signaling_nan(b, status)) {
>> + float_raise(float_flag_invalid, status);
>> + }
>> + return 0;
>> + }
>> + return ( float16_val(a) == float16_val(b) ) ||
>> + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0 );
>> +}
>> +
> See also float_16_compare_quiet
Thank your for reminding me. I did't find float16_compare and
float16_compare_quiet interface before.
>> +
>> +/*----------------------------------------------------------------------------
>> | Returns 1 if the single-precision floating-point value `a' is equal to
>> | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
>> | exception. The comparison is performed according to the IEC/IEEE Standard
>> @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b, float_status *status)
>> }
>>
>> /*----------------------------------------------------------------------------
>> +| Returns 1 if the half-precision floating-point values `a' and `b' cannot
>> +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
>> +| comparison is performed according to the IEC/IEEE Standard for Binary
>> +| Floating-Point Arithmetic.
>> +*----------------------------------------------------------------------------*/
>> +
>> +int float16_unordered_quiet(float16 a, float16 b, float_status *status)
>> +{
>> + a = float16_squash_input_denormal(a, status);
>> + b = float16_squash_input_denormal(b, status);
>> +
>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
>> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
>> + ) {
>> + if (float16_is_signaling_nan(a, status)
>> + || float16_is_signaling_nan(b, status)) {
>> + float_raise(float_flag_invalid, status);
>> + }
>> + return 1;
>> + }
>> + return 0;
>> +}
>> +
>> +
>> +/*----------------------------------------------------------------------------
>> | Returns 1 if the single-precision floating-point values `a' and `b' cannot
>> | be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
>> | comparison is performed according to the IEC/IEEE Standard for Binary
>> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
>> index 3ff3fa5..3b0754c 100644
>> --- a/include/fpu/softfloat.h
>> +++ b/include/fpu/softfloat.h
>> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status);
>> float16 float16_sqrt(float16, float_status *status);
>> int float16_compare(float16, float16, float_status *status);
>> int float16_compare_quiet(float16, float16, float_status *status);
>> +int float16_unordered_quiet(float16, float16, float_status *status);
>> +int float16_le(float16, float16, float_status *status);
>> +int float16_lt(float16, float16, float_status *status);
>> +int float16_eq_quiet(float16, float16, float_status *status);
>>
>> int float16_is_quiet_nan(float16, float_status *status);
>> int float16_is_signaling_nan(float16, float_status *status);
>> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
>> index 12aa3c0..b01548a 100644
>> --- a/linux-user/riscv/cpu_loop.c
>> +++ b/linux-user/riscv/cpu_loop.c
>> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
>> signum = 0;
>> sigcode = 0;
>> sigaddr = 0;
>> -
>> + if (env->foflag) {
>> + if (env->vfp.vl != 0) {
>> + env->foflag = false;
>> + env->pc += 4;
>> + continue;
>> + }
>> + }
> What is this trying to do?
Handle Fault-only-first exception.
>
>> switch (trapnr) {
>> case EXCP_INTERRUPT:
>> /* just indicate that signals should be handled asap */
>> diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
>> index b1c79bc..d577cef 100644
>> --- a/target/riscv/Makefile.objs
>> +++ b/target/riscv/Makefile.objs
>> @@ -1,4 +1,4 @@
>> -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o pmp.o
>> +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o vector_helper.o gdbstub.o pmp.o
>>
>> DECODETREE = $(SRC_PATH)/scripts/decodetree.py
>>
>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>> index 0adb307..5a93aa2 100644
>> --- a/target/riscv/cpu.h
>> +++ b/target/riscv/cpu.h
>> @@ -67,6 +67,7 @@
>> #define RVC RV('C')
>> #define RVS RV('S')
>> #define RVU RV('U')
>> +#define RVV RV('V')
>>
>> /* S extension denotes that Supervisor mode exists, however it is possible
>> to have a core that support S mode but does not have an MMU and there
>> @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState;
>>
>> #include "pmp.h"
>>
>> +#define VLEN 128
>> +#define VUNIT(x) (VLEN / x)
>> +
> If you want to do vectors I suggest you look at the TCGvec types for
> passing pointers to vector registers to helpers. In this case you will
> want to ensure your vector registers are properly aligned.
>
>> struct CPURISCVState {
>> target_ulong gpr[32];
>> uint64_t fpr[32]; /* assume both F and D extensions */
>> +
>> + /* vector coprocessor state. */
>> + struct {
>> + union VECTOR {
>> + float64 f64[VUNIT(64)];
>> + float32 f32[VUNIT(32)];
>> + float16 f16[VUNIT(16)];
>> + target_ulong ul[VUNIT(sizeof(target_ulong))];
>> + uint64_t u64[VUNIT(64)];
>> + int64_t s64[VUNIT(64)];
>> + uint32_t u32[VUNIT(32)];
>> + int32_t s32[VUNIT(32)];
>> + uint16_t u16[VUNIT(16)];
>> + int16_t s16[VUNIT(16)];
>> + uint8_t u8[VUNIT(8)];
>> + int8_t s8[VUNIT(8)];
>> + } vreg[32];
>> + target_ulong vxrm;
>> + target_ulong vxsat;
>> + target_ulong vl;
>> + target_ulong vstart;
>> + target_ulong vtype;
>> + float_status fp_status;
>> + } vfp;
>> +
>> + bool foflag;
> Again I have no idea what foflag is here.
>
>> target_ulong pc;
>> target_ulong load_res;
>> target_ulong load_val;
>> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
>> index 11f971a..9eb43ec 100644
>> --- a/target/riscv/cpu_bits.h
>> +++ b/target/riscv/cpu_bits.h
>> @@ -29,6 +29,14 @@
>> #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT)
>> #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
>>
>> +/* Vector Fixed-Point round model */
>> +#define FSR_VXRM_SHIFT 9
>> +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT)
>> +
>> +/* Vector Fixed-Point saturation flag */
>> +#define FSR_VXSAT_SHIFT 8
>> +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT)
>> +
>> /* Control and Status Registers */
>>
>> /* User Trap Setup */
>> @@ -48,6 +56,13 @@
>> #define CSR_FRM 0x002
>> #define CSR_FCSR 0x003
>>
>> +/* User Vector CSRs */
>> +#define CSR_VSTART 0x008
>> +#define CSR_VXSAT 0x009
>> +#define CSR_VXRM 0x00a
>> +#define CSR_VL 0xc20
>> +#define CSR_VTYPE 0xc21
>> +
>> /* User Timers and Counters */
>> #define CSR_CYCLE 0xc00
>> #define CSR_TIME 0xc01
>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>> index e32b612..405caf6 100644
>> --- a/target/riscv/cpu_helper.c
>> +++ b/target/riscv/cpu_helper.c
>> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
>> [PRV_H] = RISCV_EXCP_H_ECALL,
>> [PRV_M] = RISCV_EXCP_M_ECALL
>> };
>> + if (env->foflag) {
>> + if (env->vfp.vl != 0) {
>> + env->foflag = false;
>> + env->pc += 4;
>> + return;
>> + }
>> + }
>>
>> if (!async) {
>> /* set tval to badaddr for traps with address information */
>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
>> index e0d4586..a6131ff 100644
>> --- a/target/riscv/csr.c
>> +++ b/target/riscv/csr.c
>> @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno)
>> return 0;
>> }
>>
>> -#if !defined(CONFIG_USER_ONLY)
>> static int any(CPURISCVState *env, int csrno)
>> {
>> return 0;
>> }
>>
>> +#if !defined(CONFIG_USER_ONLY)
>> static int smode(CPURISCVState *env, int csrno)
>> {
>> return -!riscv_has_ext(env, RVS);
>> @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val)
>> return -1;
>> }
>> #endif
>> - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
>> - | (env->frm << FSR_RD_SHIFT);
>> + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT)
>> + | (env->vfp.vxsat << FSR_VXSAT_SHIFT)
>> + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
>> + | (env->frm << FSR_RD_SHIFT);
>> return 0;
>> }
>>
>> @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val)
>> env->mstatus |= MSTATUS_FS;
>> #endif
>> env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
>> + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
>> + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
>> riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
>> return 0;
>> }
>>
>> +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val)
>> +{
>> + *val = env->vfp.vtype;
>> + return 0;
>> +}
>> +
>> +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
>> +{
>> + *val = env->vfp.vl;
>> + return 0;
>> +}
>> +
>> +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val)
>> +{
>> + *val = env->vfp.vxrm;
>> + return 0;
>> +}
>> +
>> +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val)
>> +{
>> + *val = env->vfp.vxsat;
>> + return 0;
>> +}
>> +
>> +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val)
>> +{
>> + *val = env->vfp.vstart;
>> + return 0;
>> +}
>> +
>> +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val)
>> +{
>> + env->vfp.vxrm = val;
>> + return 0;
>> +}
>> +
>> +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val)
>> +{
>> + env->vfp.vxsat = val;
>> + return 0;
>> +}
>> +
>> +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val)
>> +{
>> + env->vfp.vstart = val;
>> + return 0;
>> +}
> A fixed return value makes me think these should be void functions.
Good!
>
>> +
>> /* User Timers and Counters */
>> static int read_instret(CPURISCVState *env, int csrno, target_ulong *val)
>> {
>> @@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
>> [CSR_FFLAGS] = { fs, read_fflags, write_fflags },
>> [CSR_FRM] = { fs, read_frm, write_frm },
>> [CSR_FCSR] = { fs, read_fcsr, write_fcsr },
>> -
>> + /* Vector CSRs */
>> + [CSR_VSTART] = { any, read_vstart, write_vstart },
>> + [CSR_VXSAT] = { any, read_vxsat, write_vxsat },
>> + [CSR_VXRM] = { any, read_vxrm, write_vxrm },
>> + [CSR_VL] = { any, read_vl },
>> + [CSR_VTYPE] = { any, read_vtype },
>> /* User Timers and Counters */
>> [CSR_CYCLE] = { ctr, read_instret },
>> [CSR_INSTRET] = { ctr, read_instret },
>> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
>> index debb22a..fee02c0 100644
>> --- a/target/riscv/helper.h
>> +++ b/target/riscv/helper.h
>> @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl)
>> DEF_HELPER_1(wfi, void, env)
>> DEF_HELPER_1(tlb_flush, void, env)
>> #endif
>> +/* Vector functions */
> Think about how you could split this patch up to introduce a group of
> instructions at a time. This will make it a lot easier review.
>
> I'm going to leave review of the specifics to the RISCV maintainers but
> I suspect they will want to wait until a v2 of the series. However it
> looks like a good first pass at implementing vectors.
>
> --
> Alex Bennée
It will not change softfloat in patch V2. Thank you again for your review!
Best Regards,
Zhiwei
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-29 13:35 ` liuzhiwei
0 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-08-29 13:35 UTC (permalink / raw)
To: Alex Bennée
Cc: qemu-devel, qemu-riscv, aurelien, peter.maydell, riku.voipio,
laurent, palmer, Alistair.Francis, sagark, kbastian
Hi, Alex
On 2019/8/28 下午5:08, Alex Bennée wrote:
> liuzhiwei <zhiwei_liu@c-sky.com> writes:
>
>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>> ---
>> fpu/softfloat.c | 119 +
>> include/fpu/softfloat.h | 4 +
> Changes to softfloat should be in a separate patch, but see bellow.
>
>> linux-user/riscv/cpu_loop.c | 8 +-
>> target/riscv/Makefile.objs | 2 +-
>> target/riscv/cpu.h | 30 +
>> target/riscv/cpu_bits.h | 15 +
>> target/riscv/cpu_helper.c | 7 +
>> target/riscv/csr.c | 65 +-
>> target/riscv/helper.h | 354 +
>> target/riscv/insn32.decode | 374 +-
>> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>> target/riscv/translate.c | 1 +
>> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
> This is likely too big to be reviewed. Is it possible to split the patch
> up into more discrete chunks, for example support pieces and then maybe
> a class at a time?
Yes, a patch set with cover letter will be sent later.
>
>> 13 files changed, 28017 insertions(+), 9 deletions(-)
>> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
>> create mode 100644 target/riscv/vector_helper.c
>>
>> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
>> index 2ba36ec..da155ea 100644
>> --- a/fpu/softfloat.c
>> +++ b/fpu/softfloat.c
>> @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a)
>> }
>>
>> /*----------------------------------------------------------------------------
>> +| Returns the sign bit of the half-precision floating-point value `a'.
>> +*----------------------------------------------------------------------------*/
>> +
>> +static inline flag extractFloat16Sign(float16 a)
>> +{
>> + return float16_val(a) >> 0xf;
>> +}
>> +
> We are trying to avoid this sort of bit fiddling for new code when we
> already have generic decompose functions that can extract all the parts
> into a common format.
>
>> +
>> +/*----------------------------------------------------------------------------
>> | Returns the fraction bits of the single-precision floating-point value `a'.
>> *----------------------------------------------------------------------------*/
>>
>> @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status *status)
>> }
>>
>> /*----------------------------------------------------------------------------
>> +| Returns 1 if the half-precision floating-point value `a' is less than
>> +| or equal to the corresponding value `b', and 0 otherwise. The invalid
>> +| exception is raised if either operand is a NaN. The comparison is performed
>> +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>> +*----------------------------------------------------------------------------*/
>> +
>> +int float16_le(float16 a, float16 b, float_status *status)
>> +{
>> + flag aSign, bSign;
>> + uint16_t av, bv;
>> + a = float16_squash_input_denormal(a, status);
>> + b = float16_squash_input_denormal(b, status);
>> +
>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
>> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
>> + ) {
>> + float_raise(float_flag_invalid, status);
>> + return 0;
>> + }
>> + aSign = extractFloat16Sign( a );
>> + bSign = extractFloat16Sign( b );
>> + av = float16_val(a);
>> + bv = float16_val(b);
>> + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 ) == 0 );
>> + return ( av == bv ) || ( aSign ^ ( av < bv ) );
>> +
>> +}
> What does this provide that:
>
> float16_compare(a, b, status) == float_relation_less;
>
> doesn't?
>
>> +
>> +/*----------------------------------------------------------------------------
>> | Returns 1 if the single-precision floating-point value `a' is less than
>> | or equal to the corresponding value `b', and 0 otherwise. The invalid
>> | exception is raised if either operand is a NaN. The comparison is performed
>> @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status *status)
>> | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>> *----------------------------------------------------------------------------*/
>>
>> +int float16_lt(float16 a, float16 b, float_status *status)
>> +{
>> + flag aSign, bSign;
>> + uint16_t av, bv;
>> + a = float16_squash_input_denormal(a, status);
>> + b = float16_squash_input_denormal(b, status);
>> +
>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
>> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
>> + ) {
>> + float_raise(float_flag_invalid, status);
>> + return 0;
>> + }
>> + aSign = extractFloat16Sign( a );
>> + bSign = extractFloat16Sign( b );
>> + av = float16_val(a);
>> + bv = float16_val(b);
>> + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 ) != 0 );
>> + return ( av != bv ) && ( aSign ^ ( av < bv ) );
>> +
>> +}
>> +
>> +/*----------------------------------------------------------------------------
>> +| Returns 1 if the single-precision floating-point value `a' is less than
>> +| the corresponding value `b', and 0 otherwise. The invalid exception is
>> +| raised if either operand is a NaN. The comparison is performed according
>> +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>> +*----------------------------------------------------------------------------*/
>> +
>> int float32_lt(float32 a, float32 b, float_status *status)
>> {
>> flag aSign, bSign;
>> @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b, float_status *status)
>> }
>>
>> /*----------------------------------------------------------------------------
>> +| Returns 1 if the half-precision floating-point value `a' is equal to
>> +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
>> +| exception. The comparison is performed according to the IEC/IEEE Standard
>> +| for Binary Floating-Point Arithmetic.
>> +*----------------------------------------------------------------------------*/
>> +
>> +int float16_eq_quiet(float16 a, float16 b, float_status *status)
>> +{
>> + a = float16_squash_input_denormal(a, status);
>> + b = float16_squash_input_denormal(b, status);
>> +
>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
>> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
>> + ) {
>> + if (float16_is_signaling_nan(a, status)
>> + || float16_is_signaling_nan(b, status)) {
>> + float_raise(float_flag_invalid, status);
>> + }
>> + return 0;
>> + }
>> + return ( float16_val(a) == float16_val(b) ) ||
>> + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0 );
>> +}
>> +
> See also float_16_compare_quiet
Thank your for reminding me. I did't find float16_compare and
float16_compare_quiet interface before.
>> +
>> +/*----------------------------------------------------------------------------
>> | Returns 1 if the single-precision floating-point value `a' is equal to
>> | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
>> | exception. The comparison is performed according to the IEC/IEEE Standard
>> @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b, float_status *status)
>> }
>>
>> /*----------------------------------------------------------------------------
>> +| Returns 1 if the half-precision floating-point values `a' and `b' cannot
>> +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
>> +| comparison is performed according to the IEC/IEEE Standard for Binary
>> +| Floating-Point Arithmetic.
>> +*----------------------------------------------------------------------------*/
>> +
>> +int float16_unordered_quiet(float16 a, float16 b, float_status *status)
>> +{
>> + a = float16_squash_input_denormal(a, status);
>> + b = float16_squash_input_denormal(b, status);
>> +
>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) )
>> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) )
>> + ) {
>> + if (float16_is_signaling_nan(a, status)
>> + || float16_is_signaling_nan(b, status)) {
>> + float_raise(float_flag_invalid, status);
>> + }
>> + return 1;
>> + }
>> + return 0;
>> +}
>> +
>> +
>> +/*----------------------------------------------------------------------------
>> | Returns 1 if the single-precision floating-point values `a' and `b' cannot
>> | be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
>> | comparison is performed according to the IEC/IEEE Standard for Binary
>> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
>> index 3ff3fa5..3b0754c 100644
>> --- a/include/fpu/softfloat.h
>> +++ b/include/fpu/softfloat.h
>> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status);
>> float16 float16_sqrt(float16, float_status *status);
>> int float16_compare(float16, float16, float_status *status);
>> int float16_compare_quiet(float16, float16, float_status *status);
>> +int float16_unordered_quiet(float16, float16, float_status *status);
>> +int float16_le(float16, float16, float_status *status);
>> +int float16_lt(float16, float16, float_status *status);
>> +int float16_eq_quiet(float16, float16, float_status *status);
>>
>> int float16_is_quiet_nan(float16, float_status *status);
>> int float16_is_signaling_nan(float16, float_status *status);
>> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
>> index 12aa3c0..b01548a 100644
>> --- a/linux-user/riscv/cpu_loop.c
>> +++ b/linux-user/riscv/cpu_loop.c
>> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
>> signum = 0;
>> sigcode = 0;
>> sigaddr = 0;
>> -
>> + if (env->foflag) {
>> + if (env->vfp.vl != 0) {
>> + env->foflag = false;
>> + env->pc += 4;
>> + continue;
>> + }
>> + }
> What is this trying to do?
Handle Fault-only-first exception.
>
>> switch (trapnr) {
>> case EXCP_INTERRUPT:
>> /* just indicate that signals should be handled asap */
>> diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
>> index b1c79bc..d577cef 100644
>> --- a/target/riscv/Makefile.objs
>> +++ b/target/riscv/Makefile.objs
>> @@ -1,4 +1,4 @@
>> -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o pmp.o
>> +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o vector_helper.o gdbstub.o pmp.o
>>
>> DECODETREE = $(SRC_PATH)/scripts/decodetree.py
>>
>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>> index 0adb307..5a93aa2 100644
>> --- a/target/riscv/cpu.h
>> +++ b/target/riscv/cpu.h
>> @@ -67,6 +67,7 @@
>> #define RVC RV('C')
>> #define RVS RV('S')
>> #define RVU RV('U')
>> +#define RVV RV('V')
>>
>> /* S extension denotes that Supervisor mode exists, however it is possible
>> to have a core that support S mode but does not have an MMU and there
>> @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState;
>>
>> #include "pmp.h"
>>
>> +#define VLEN 128
>> +#define VUNIT(x) (VLEN / x)
>> +
> If you want to do vectors I suggest you look at the TCGvec types for
> passing pointers to vector registers to helpers. In this case you will
> want to ensure your vector registers are properly aligned.
>
>> struct CPURISCVState {
>> target_ulong gpr[32];
>> uint64_t fpr[32]; /* assume both F and D extensions */
>> +
>> + /* vector coprocessor state. */
>> + struct {
>> + union VECTOR {
>> + float64 f64[VUNIT(64)];
>> + float32 f32[VUNIT(32)];
>> + float16 f16[VUNIT(16)];
>> + target_ulong ul[VUNIT(sizeof(target_ulong))];
>> + uint64_t u64[VUNIT(64)];
>> + int64_t s64[VUNIT(64)];
>> + uint32_t u32[VUNIT(32)];
>> + int32_t s32[VUNIT(32)];
>> + uint16_t u16[VUNIT(16)];
>> + int16_t s16[VUNIT(16)];
>> + uint8_t u8[VUNIT(8)];
>> + int8_t s8[VUNIT(8)];
>> + } vreg[32];
>> + target_ulong vxrm;
>> + target_ulong vxsat;
>> + target_ulong vl;
>> + target_ulong vstart;
>> + target_ulong vtype;
>> + float_status fp_status;
>> + } vfp;
>> +
>> + bool foflag;
> Again I have no idea what foflag is here.
>
>> target_ulong pc;
>> target_ulong load_res;
>> target_ulong load_val;
>> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
>> index 11f971a..9eb43ec 100644
>> --- a/target/riscv/cpu_bits.h
>> +++ b/target/riscv/cpu_bits.h
>> @@ -29,6 +29,14 @@
>> #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT)
>> #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
>>
>> +/* Vector Fixed-Point round model */
>> +#define FSR_VXRM_SHIFT 9
>> +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT)
>> +
>> +/* Vector Fixed-Point saturation flag */
>> +#define FSR_VXSAT_SHIFT 8
>> +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT)
>> +
>> /* Control and Status Registers */
>>
>> /* User Trap Setup */
>> @@ -48,6 +56,13 @@
>> #define CSR_FRM 0x002
>> #define CSR_FCSR 0x003
>>
>> +/* User Vector CSRs */
>> +#define CSR_VSTART 0x008
>> +#define CSR_VXSAT 0x009
>> +#define CSR_VXRM 0x00a
>> +#define CSR_VL 0xc20
>> +#define CSR_VTYPE 0xc21
>> +
>> /* User Timers and Counters */
>> #define CSR_CYCLE 0xc00
>> #define CSR_TIME 0xc01
>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>> index e32b612..405caf6 100644
>> --- a/target/riscv/cpu_helper.c
>> +++ b/target/riscv/cpu_helper.c
>> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
>> [PRV_H] = RISCV_EXCP_H_ECALL,
>> [PRV_M] = RISCV_EXCP_M_ECALL
>> };
>> + if (env->foflag) {
>> + if (env->vfp.vl != 0) {
>> + env->foflag = false;
>> + env->pc += 4;
>> + return;
>> + }
>> + }
>>
>> if (!async) {
>> /* set tval to badaddr for traps with address information */
>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
>> index e0d4586..a6131ff 100644
>> --- a/target/riscv/csr.c
>> +++ b/target/riscv/csr.c
>> @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno)
>> return 0;
>> }
>>
>> -#if !defined(CONFIG_USER_ONLY)
>> static int any(CPURISCVState *env, int csrno)
>> {
>> return 0;
>> }
>>
>> +#if !defined(CONFIG_USER_ONLY)
>> static int smode(CPURISCVState *env, int csrno)
>> {
>> return -!riscv_has_ext(env, RVS);
>> @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val)
>> return -1;
>> }
>> #endif
>> - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
>> - | (env->frm << FSR_RD_SHIFT);
>> + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT)
>> + | (env->vfp.vxsat << FSR_VXSAT_SHIFT)
>> + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
>> + | (env->frm << FSR_RD_SHIFT);
>> return 0;
>> }
>>
>> @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val)
>> env->mstatus |= MSTATUS_FS;
>> #endif
>> env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
>> + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
>> + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
>> riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
>> return 0;
>> }
>>
>> +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val)
>> +{
>> + *val = env->vfp.vtype;
>> + return 0;
>> +}
>> +
>> +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
>> +{
>> + *val = env->vfp.vl;
>> + return 0;
>> +}
>> +
>> +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val)
>> +{
>> + *val = env->vfp.vxrm;
>> + return 0;
>> +}
>> +
>> +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val)
>> +{
>> + *val = env->vfp.vxsat;
>> + return 0;
>> +}
>> +
>> +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val)
>> +{
>> + *val = env->vfp.vstart;
>> + return 0;
>> +}
>> +
>> +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val)
>> +{
>> + env->vfp.vxrm = val;
>> + return 0;
>> +}
>> +
>> +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val)
>> +{
>> + env->vfp.vxsat = val;
>> + return 0;
>> +}
>> +
>> +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val)
>> +{
>> + env->vfp.vstart = val;
>> + return 0;
>> +}
> A fixed return value makes me think these should be void functions.
Good!
>
>> +
>> /* User Timers and Counters */
>> static int read_instret(CPURISCVState *env, int csrno, target_ulong *val)
>> {
>> @@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
>> [CSR_FFLAGS] = { fs, read_fflags, write_fflags },
>> [CSR_FRM] = { fs, read_frm, write_frm },
>> [CSR_FCSR] = { fs, read_fcsr, write_fcsr },
>> -
>> + /* Vector CSRs */
>> + [CSR_VSTART] = { any, read_vstart, write_vstart },
>> + [CSR_VXSAT] = { any, read_vxsat, write_vxsat },
>> + [CSR_VXRM] = { any, read_vxrm, write_vxrm },
>> + [CSR_VL] = { any, read_vl },
>> + [CSR_VTYPE] = { any, read_vtype },
>> /* User Timers and Counters */
>> [CSR_CYCLE] = { ctr, read_instret },
>> [CSR_INSTRET] = { ctr, read_instret },
>> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
>> index debb22a..fee02c0 100644
>> --- a/target/riscv/helper.h
>> +++ b/target/riscv/helper.h
>> @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl)
>> DEF_HELPER_1(wfi, void, env)
>> DEF_HELPER_1(tlb_flush, void, env)
>> #endif
>> +/* Vector functions */
> Think about how you could split this patch up to introduce a group of
> instructions at a time. This will make it a lot easier review.
>
> I'm going to leave review of the specifics to the RISCV maintainers but
> I suspect they will want to wait until a v2 of the series. However it
> looks like a good first pass at implementing vectors.
>
> --
> Alex Bennée
It will not change softfloat in patch V2. Thank you again for your review!
Best Regards,
Zhiwei
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 2:36 [Qemu-riscv] [PATCH] RISCV: support riscv vector extension 0.7.1 liuzhiwei
` (3 preceding siblings ...)
2019-08-28 21:34 ` [Qemu-riscv] " Alistair Francis
@ 2019-08-29 14:06 ` Chih-Min Chao
2019-09-02 8:17 ` liuzhiwei
4 siblings, 1 reply; 52+ messages in thread
From: Chih-Min Chao @ 2019-08-29 14:06 UTC (permalink / raw)
To: liuzhiwei
Cc: qemu-devel@nongnu.org Developers, open list:RISC-V,
Peter Maydell, Palmer Dabbelt, Sagar Karandikar,
Bastian Koppelmann, riku.voipio, laurent, Alistair Francis,
Alex Bennée, aurelien
[-- Attachment #1: Type: text/plain, Size: 1042632 bytes --]
Hi Liuzhiwei,
Some comments:
1. vector extension allows flexible implementation. It is better to
describe the limitation of current implementation (such as vlen/elen/slen)
, supported sections and unsupported features.
2. there should be cfg.ext_v to turn on vector extension from
command line
3. from license
It should be "Copyright (c) 2019 C-SKY Limited, All rights
reserved." but not "2011 ~ 2019"
It is huge work wait and thanks for your contribution.
chihmin
On Wed, Aug 28, 2019 at 3:06 PM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
> ---
> fpu/softfloat.c | 119 +
> include/fpu/softfloat.h | 4 +
> linux-user/riscv/cpu_loop.c | 8 +-
> target/riscv/Makefile.objs | 2 +-
> target/riscv/cpu.h | 30 +
> target/riscv/cpu_bits.h | 15 +
> target/riscv/cpu_helper.c | 7 +
> target/riscv/csr.c | 65 +-
> target/riscv/helper.h | 354 +
> target/riscv/insn32.decode | 374 +-
> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> target/riscv/translate.c | 1 +
> target/riscv/vector_helper.c | 26563
> ++++++++++++++++++++++++++++++
> 13 files changed, 28017 insertions(+), 9 deletions(-)
> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> create mode 100644 target/riscv/vector_helper.c
>
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index 2ba36ec..da155ea 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a)
> }
>
>
> /*----------------------------------------------------------------------------
> +| Returns the sign bit of the half-precision floating-point value `a'.
>
> +*----------------------------------------------------------------------------*/
> +
> +static inline flag extractFloat16Sign(float16 a)
> +{
> + return float16_val(a) >> 0xf;
> +}
> +
> +
>
> +/*----------------------------------------------------------------------------
> | Returns the fraction bits of the single-precision floating-point value
> `a'.
>
> *----------------------------------------------------------------------------*/
>
> @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status
> *status)
> }
>
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point value `a' is less than
> +| or equal to the corresponding value `b', and 0 otherwise. The invalid
> +| exception is raised if either operand is a NaN. The comparison is
> performed
> +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>
> +*----------------------------------------------------------------------------*/
> +
> +int float16_le(float16 a, float16 b, float_status *status)
> +{
> + flag aSign, bSign;
> + uint16_t av, bv;
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a
> ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b
> ) )
> + ) {
> + float_raise(float_flag_invalid, status);
> + return 0;
> + }
> + aSign = extractFloat16Sign( a );
> + bSign = extractFloat16Sign( b );
> + av = float16_val(a);
> + bv = float16_val(b);
> + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 )
> == 0 );
> + return ( av == bv ) || ( aSign ^ ( av < bv ) );
> +
> +}
> +
>
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point value `a' is less than
> | or equal to the corresponding value `b', and 0 otherwise. The invalid
> | exception is raised if either operand is a NaN. The comparison is
> performed
> @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status
> *status)
> | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>
> *----------------------------------------------------------------------------*/
>
> +int float16_lt(float16 a, float16 b, float_status *status)
> +{
> + flag aSign, bSign;
> + uint16_t av, bv;
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a
> ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b
> ) )
> + ) {
> + float_raise(float_flag_invalid, status);
> + return 0;
> + }
> + aSign = extractFloat16Sign( a );
> + bSign = extractFloat16Sign( b );
> + av = float16_val(a);
> + bv = float16_val(b);
> + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 )
> != 0 );
> + return ( av != bv ) && ( aSign ^ ( av < bv ) );
> +
> +}
> +
>
> +/*----------------------------------------------------------------------------
> +| Returns 1 if the single-precision floating-point value `a' is less than
> +| the corresponding value `b', and 0 otherwise. The invalid exception is
> +| raised if either operand is a NaN. The comparison is performed
> according
> +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>
> +*----------------------------------------------------------------------------*/
> +
> int float32_lt(float32 a, float32 b, float_status *status)
> {
> flag aSign, bSign;
> @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b,
> float_status *status)
> }
>
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point value `a' is equal to
> +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
> an
> +| exception. The comparison is performed according to the IEC/IEEE
> Standard
> +| for Binary Floating-Point Arithmetic.
>
> +*----------------------------------------------------------------------------*/
> +
> +int float16_eq_quiet(float16 a, float16 b, float_status *status)
> +{
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a
> ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b
> ) )
> + ) {
> + if (float16_is_signaling_nan(a, status)
> + || float16_is_signaling_nan(b, status)) {
> + float_raise(float_flag_invalid, status);
> + }
> + return 0;
> + }
> + return ( float16_val(a) == float16_val(b) ) ||
> + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0
> );
> +}
> +
> +
>
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point value `a' is equal to
> | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
> an
> | exception. The comparison is performed according to the IEC/IEEE
> Standard
> @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b,
> float_status *status)
> }
>
>
> /*----------------------------------------------------------------------------
> +| Returns 1 if the half-precision floating-point values `a' and `b' cannot
> +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
> The
> +| comparison is performed according to the IEC/IEEE Standard for Binary
> +| Floating-Point Arithmetic.
>
> +*----------------------------------------------------------------------------*/
> +
> +int float16_unordered_quiet(float16 a, float16 b, float_status *status)
> +{
> + a = float16_squash_input_denormal(a, status);
> + b = float16_squash_input_denormal(b, status);
> +
> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a
> ) )
> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b
> ) )
> + ) {
> + if (float16_is_signaling_nan(a, status)
> + || float16_is_signaling_nan(b, status)) {
> + float_raise(float_flag_invalid, status);
> + }
> + return 1;
> + }
> + return 0;
> +}
> +
> +
>
> +/*----------------------------------------------------------------------------
> | Returns 1 if the single-precision floating-point values `a' and `b'
> cannot
> | be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
> The
> | comparison is performed according to the IEC/IEEE Standard for Binary
> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
> index 3ff3fa5..3b0754c 100644
> --- a/include/fpu/softfloat.h
> +++ b/include/fpu/softfloat.h
> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16,
> float_status *status);
> float16 float16_sqrt(float16, float_status *status);
> int float16_compare(float16, float16, float_status *status);
> int float16_compare_quiet(float16, float16, float_status *status);
> +int float16_unordered_quiet(float16, float16, float_status *status);
> +int float16_le(float16, float16, float_status *status);
> +int float16_lt(float16, float16, float_status *status);
> +int float16_eq_quiet(float16, float16, float_status *status);
>
> int float16_is_quiet_nan(float16, float_status *status);
> int float16_is_signaling_nan(float16, float_status *status);
> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
> index 12aa3c0..b01548a 100644
> --- a/linux-user/riscv/cpu_loop.c
> +++ b/linux-user/riscv/cpu_loop.c
> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
> signum = 0;
> sigcode = 0;
> sigaddr = 0;
> -
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + continue;
> + }
> + }
> switch (trapnr) {
> case EXCP_INTERRUPT:
> /* just indicate that signals should be handled asap */
> diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
> index b1c79bc..d577cef 100644
> --- a/target/riscv/Makefile.objs
> +++ b/target/riscv/Makefile.objs
> @@ -1,4 +1,4 @@
> -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o
> gdbstub.o pmp.o
> +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o
> vector_helper.o gdbstub.o pmp.o
>
> DECODETREE = $(SRC_PATH)/scripts/decodetree.py
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 0adb307..5a93aa2 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -67,6 +67,7 @@
> #define RVC RV('C')
> #define RVS RV('S')
> #define RVU RV('U')
> +#define RVV RV('V')
>
> /* S extension denotes that Supervisor mode exists, however it is possible
> to have a core that support S mode but does not have an MMU and there
> @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState;
>
> #include "pmp.h"
>
> +#define VLEN 128
> +#define VUNIT(x) (VLEN / x)
> +
> struct CPURISCVState {
> target_ulong gpr[32];
> uint64_t fpr[32]; /* assume both F and D extensions */
> +
> + /* vector coprocessor state. */
> + struct {
> + union VECTOR {
> + float64 f64[VUNIT(64)];
> + float32 f32[VUNIT(32)];
> + float16 f16[VUNIT(16)];
> + target_ulong ul[VUNIT(sizeof(target_ulong))];
> + uint64_t u64[VUNIT(64)];
> + int64_t s64[VUNIT(64)];
> + uint32_t u32[VUNIT(32)];
> + int32_t s32[VUNIT(32)];
> + uint16_t u16[VUNIT(16)];
> + int16_t s16[VUNIT(16)];
> + uint8_t u8[VUNIT(8)];
> + int8_t s8[VUNIT(8)];
> + } vreg[32];
> + target_ulong vxrm;
> + target_ulong vxsat;
> + target_ulong vl;
> + target_ulong vstart;
> + target_ulong vtype;
> + float_status fp_status;
> + } vfp;
> +
> + bool foflag;
> target_ulong pc;
> target_ulong load_res;
> target_ulong load_val;
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index 11f971a..9eb43ec 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -29,6 +29,14 @@
> #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT)
> #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA |
> FSR_NXA)
>
> +/* Vector Fixed-Point round model */
> +#define FSR_VXRM_SHIFT 9
> +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT)
> +
> +/* Vector Fixed-Point saturation flag */
> +#define FSR_VXSAT_SHIFT 8
> +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT)
> +
> /* Control and Status Registers */
>
> /* User Trap Setup */
> @@ -48,6 +56,13 @@
> #define CSR_FRM 0x002
> #define CSR_FCSR 0x003
>
> +/* User Vector CSRs */
> +#define CSR_VSTART 0x008
> +#define CSR_VXSAT 0x009
> +#define CSR_VXRM 0x00a
> +#define CSR_VL 0xc20
> +#define CSR_VTYPE 0xc21
> +
> /* User Timers and Counters */
> #define CSR_CYCLE 0xc00
> #define CSR_TIME 0xc01
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index e32b612..405caf6 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
> [PRV_H] = RISCV_EXCP_H_ECALL,
> [PRV_M] = RISCV_EXCP_M_ECALL
> };
> + if (env->foflag) {
> + if (env->vfp.vl != 0) {
> + env->foflag = false;
> + env->pc += 4;
> + return;
> + }
> + }
>
> if (!async) {
> /* set tval to badaddr for traps with address information */
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index e0d4586..a6131ff 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno)
> return 0;
> }
>
> -#if !defined(CONFIG_USER_ONLY)
> static int any(CPURISCVState *env, int csrno)
> {
> return 0;
> }
>
> +#if !defined(CONFIG_USER_ONLY)
> static int smode(CPURISCVState *env, int csrno)
> {
> return -!riscv_has_ext(env, RVS);
> @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno,
> target_ulong *val)
> return -1;
> }
> #endif
> - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
> - | (env->frm << FSR_RD_SHIFT);
> + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT)
> + | (env->vfp.vxsat << FSR_VXSAT_SHIFT)
> + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
> + | (env->frm << FSR_RD_SHIFT);
> return 0;
> }
>
> @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno,
> target_ulong val)
> env->mstatus |= MSTATUS_FS;
> #endif
> env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
> + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
> + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
> riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
> return 0;
> }
>
> +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vtype;
> + return 0;
> +}
> +
> +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vl;
> + return 0;
> +}
> +
> +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vxrm;
> + return 0;
> +}
> +
> +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vxsat;
> + return 0;
> +}
> +
> +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val)
> +{
> + *val = env->vfp.vstart;
> + return 0;
> +}
> +
> +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val)
> +{
> + env->vfp.vxrm = val;
> + return 0;
> +}
> +
> +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val)
> +{
> + env->vfp.vxsat = val;
> + return 0;
> +}
> +
> +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val)
> +{
> + env->vfp.vstart = val;
> + return 0;
> +}
> +
> /* User Timers and Counters */
> static int read_instret(CPURISCVState *env, int csrno, target_ulong *val)
> {
> @@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] =
> {
> [CSR_FFLAGS] = { fs, read_fflags, write_fflags
> },
> [CSR_FRM] = { fs, read_frm, write_frm
> },
> [CSR_FCSR] = { fs, read_fcsr, write_fcsr
> },
> -
> + /* Vector CSRs */
> + [CSR_VSTART] = { any, read_vstart, write_vstart
> },
> + [CSR_VXSAT] = { any, read_vxsat, write_vxsat
> },
> + [CSR_VXRM] = { any, read_vxrm, write_vxrm
> },
> + [CSR_VL] = { any, read_vl
> },
> + [CSR_VTYPE] = { any, read_vtype
> },
> /* User Timers and Counters */
> [CSR_CYCLE] = { ctr, read_instret
> },
> [CSR_INSTRET] = { ctr, read_instret
> },
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index debb22a..fee02c0 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl)
> DEF_HELPER_1(wfi, void, env)
> DEF_HELPER_1(tlb_flush, void, env)
> #endif
> +/* Vector functions */
> +DEF_HELPER_5(vector_vlb_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlh_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlw_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vle_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlbu_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlhu_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlwu_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlbff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlhff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlwff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vleff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlbuff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlhuff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vlwuff_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsb_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsh_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsw_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vse_v, void, env, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlsb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlsh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlsw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlse_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlsbu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlshu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlswu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vssb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vssh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vssw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsse_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxe_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxbu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxhu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vlxwu_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsxb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsxh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsxw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsxe_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsuxb_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsuxh_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsuxw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vsuxe_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoswapw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoswapd_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoaddw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoaddd_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoxorw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoxord_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoandw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoandd_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoorw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamoord_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamominw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomind_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomaxw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomaxd_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamominuw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamominud_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomaxuw_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_6(vector_vamomaxud_v, void, env, i32, i32, i32, i32, i32)
> +DEF_HELPER_4(vector_vext_x_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfmv_f_s, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmv_s_x, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfmv_s_f, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vadc_vvm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vadc_vxm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vadc_vim, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmadc_vvm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmadc_vxm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmadc_vim, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vsbc_vvm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vsbc_vxm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsbc_vvm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsbc_vxm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmpopc_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmfirst_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vcompress_vm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmandnot_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmand_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmor_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmxor_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmornot_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmnand_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmnor_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmxnor_mm, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsbf_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsof_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vmsif_m, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_viota_m, void, env, i32, i32, i32)
> +DEF_HELPER_3(vector_vid_v, void, env, i32, i32)
> +DEF_HELPER_4(vector_vfcvt_xu_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfcvt_x_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfcvt_f_xu_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfcvt_f_x_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_xu_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_x_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_f_xu_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_f_x_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfwcvt_f_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_xu_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_x_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_f_xu_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_f_x_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfncvt_f_f_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfsqrt_v, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vfclass_v, void, env, i32, i32, i32)
> +DEF_HELPER_5(vector_vadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vadd_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredsum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfadd_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredand_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfredsum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredor_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrsub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrsub_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredxor_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfredosum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vminu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vminu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredminu_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmin_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmin_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmin_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmin_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredmin_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfredmin_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmaxu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmaxu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredmaxu_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmax_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmax_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmax_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmax_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vredmax_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfredmax_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnj_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnj_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vand_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vand_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vand_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnjn_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnjn_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vor_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vor_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vor_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnjx_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfsgnjx_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vxor_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vxor_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vxor_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrgather_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrgather_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrgather_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslideup_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslideup_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslide1up_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslidedown_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslidedown_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vslide1down_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmerge_vvm, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmerge_vxm, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmerge_vim, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmerge_vfm, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmseq_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmseq_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmseq_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfeq_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfeq_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsne_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsne_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsne_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfle_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfle_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsltu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsltu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmford_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmford_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmslt_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmslt_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmflt_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmflt_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsleu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsleu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsleu_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfne_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfne_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsle_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsle_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsle_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfgt_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsgtu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsgtu_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsgt_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmsgt_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmfge_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsaddu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsaddu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsaddu_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vdivu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vdivu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfdiv_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfdiv_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsadd_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vdiv_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vdiv_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfrdiv_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssubu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssubu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vremu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vremu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrem_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vrem_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vaadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vaadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vaadd_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulhu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulhu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmul_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsll_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsll_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsll_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmul_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vasub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vasub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulhsu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulhsu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsmul_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulh_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmulh_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfrsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsrl_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsrl_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsrl_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmadd_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsra_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsra_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vsra_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmadd_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssrl_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssrl_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssrl_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssra_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssra_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vssra_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnmsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnmsub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsrl_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsrl_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsrl_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmacc_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsra_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsra_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnsra_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vmacc_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmacc_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclipu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclipu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclipu_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfmsac_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclip_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclip_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnclip_vi, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vnmsac_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfnmsac_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwredsumu_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwaddu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwaddu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwadd_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwredsum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwadd_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwadd_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwredsum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsubu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsubu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwsub_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsub_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsub_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwredosum_vs, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwaddu_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwaddu_wx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwadd_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwadd_wf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwadd_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwadd_wx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsubu_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsubu_wx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwsub_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwsub_wf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsub_wv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsub_wx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmulu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmulu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmul_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmulsu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmulsu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmul_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmul_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmacc_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmacc_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmacc_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwnmacc_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwnmacc_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccsu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccsu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccsu_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccsu_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwmsac_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwsmaccus_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vwmaccus_vx, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwnmsac_vv, void, env, i32, i32, i32, i32)
> +DEF_HELPER_5(vector_vfwnmsac_vf, void, env, i32, i32, i32, i32)
> +DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32)
> +DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32)
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index 77f794e..d125ff9 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -25,7 +25,7 @@
> %sh10 20:10
> %csr 20:12
> %rm 12:3
> -
> +%nf 29:3
> # immediates:
> %imm_i 20:s12
> %imm_s 25:s7 7:5
> @@ -43,7 +43,6 @@
> &u imm rd
> &shift shamt rs1 rd
> &atomic aq rl rs2 rs1 rd
> -
> # Formats 32:
> @r ....... ..... ..... ... ..... ....... &r %rs2
> %rs1 %rd
> @i ............ ..... ... ..... ....... &i imm=%imm_i
> %rs1 %rd
> @@ -62,11 +61,17 @@
> @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
> @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd
> @r2 ....... ..... ..... ... ..... ....... %rs1 %rd
> +@r_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
> +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
> +@r_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
> +@r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd
> +@r2_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rd
> +@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd
> +@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
>
> @sfence_vma ....... ..... ..... ... ..... ....... %rs2 %rs1
> @sfence_vm ....... ..... ..... ... ..... ....... %rs1
>
> -
> # *** Privileged Instructions ***
> ecall 000000000000 00000 000 00000 1110011
> ebreak 000000000001 00000 000 00000 1110011
> @@ -203,3 +208,366 @@ fcvt_w_d 1100001 00000 ..... ... ..... 1010011
> @r2_rm
> fcvt_wu_d 1100001 00001 ..... ... ..... 1010011 @r2_rm
> fcvt_d_w 1101001 00000 ..... ... ..... 1010011 @r2_rm
> fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm
> +
> +# *** RV32V Standard Extension ***
> +
> +# *** Vector loads and stores are encoded within LOADFP/STORE-FP ***
> +vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm
> +vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm
> +vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm
> +vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm
> +vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm
> +vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm
> +vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm
> +vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm
> +vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm
> +vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm
> +vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm
> +vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm
> +vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm
> +vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm
> +vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm
> +vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm
> +vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm
> +vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm
> +
> +vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm
> +vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm
> +vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm
> +vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm
> +vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm
> +vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm
> +vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm
> +vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm
> +vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm
> +vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm
> +vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm
> +
> +vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm
> +vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm
> +vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm
> +vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm
> +vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm
> +vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm
> +vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm
> +vsxb_v ... 011 . ..... ..... 000 ..... 0100111 @r_nfvm
> +vsxh_v ... 011 . ..... ..... 101 ..... 0100111 @r_nfvm
> +vsxw_v ... 011 . ..... ..... 110 ..... 0100111 @r_nfvm
> +vsxe_v ... 011 . ..... ..... 111 ..... 0100111 @r_nfvm
> +vsuxb_v ... 111 . ..... ..... 000 ..... 0100111 @r_nfvm
> +vsuxh_v ... 111 . ..... ..... 101 ..... 0100111 @r_nfvm
> +vsuxw_v ... 111 . ..... ..... 110 ..... 0100111 @r_nfvm
> +vsuxe_v ... 111 . ..... ..... 111 ..... 0100111 @r_nfvm
> +
> +#*** Vector AMO operations are encoded under the standard AMO major
> opcode.***
> +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm
> +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm
> +
> +#*** new major opcode OP-V ***
> +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm
> +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm
> +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm
> +vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm
> +vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm
> +vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm
> +vfredsum_vs 000001 . ..... ..... 001 ..... 1010111 @r_vm
> +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm
> +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm
> +vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm
> +vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm
> +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm
> +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm
> +vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm
> +vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm
> +vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm
> +vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm
> +vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm
> +vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm
> +vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm
> +vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm
> +vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm
> +vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm
> +vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm
> +vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm
> +vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm
> +vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm
> +vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm
> +vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm
> +vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm
> +vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm
> +vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm
> +vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm
> +vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm
> +vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm
> +vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm
> +vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm
> +vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm
> +vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm
> +vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm
> +vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm
> +vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm
> +vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm
> +vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm
> +vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm
> +vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm
> +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r
> +vfmv_f_s 001100 1 ..... ..... 001 ..... 1010111 @r
> +vmv_s_x 001101 1 ..... ..... 110 ..... 1010111 @r
> +vfmv_s_f 001101 1 ..... ..... 101 ..... 1010111 @r
> +vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm
> +vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm
> +vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm
> +vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm
> +vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm
> +vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm
> +vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r
> +vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r
> +vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r
> +vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r
> +vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r
> +vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r
> +vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r
> +vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r
> +vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r
> +vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r
> +vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm
> +vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm
> +vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm
> +vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm
> +vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm
> +viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm
> +vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm
> +vmerge_vvm 010111 . ..... ..... 000 ..... 1010111 @r_vm
> +vmerge_vxm 010111 . ..... ..... 100 ..... 1010111 @r_vm
> +vmerge_vim 010111 . ..... ..... 011 ..... 1010111 @r_vm
> +vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r
> +vfmerge_vfm 010111 . ..... ..... 101 ..... 1010111 @r_vm
> +vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm
> +vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm
> +vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm
> +vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r
> +vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm
> +vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm
> +vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm
> +vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r
> +vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm
> +vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm
> +vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm
> +vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r
> +vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm
> +vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm
> +vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm
> +vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm
> +vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r
> +vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm
> +vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm
> +vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm
> +vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r
> +vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm
> +vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm
> +vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm
> +vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r
> +vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm
> +vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm
> +vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r
> +vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm
> +vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm
> +vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r
> +vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm
> +vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm
> +vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm
> +vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm
> +vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm
> +vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm
> +vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm
> +vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm
> +vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm
> +vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm
> +vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm
> +vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm
> +vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm
> +vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm
> +vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm
> +vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm
> +vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm
> +vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm
> +vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm
> +vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm
> +vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm
> +vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm
> +vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm
> +vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm
> +vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm
> +vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm
> +vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm
> +vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm
> +vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm
> +vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm
> +vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm
> +vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm
> +vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm
> +vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm
> +vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm
> +vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm
> +vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm
> +vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm
> +vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm
> +vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm
> +vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm
> +vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm
> +vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm
> +vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm
> +vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm
> +vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm
> +vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm
> +vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm
> +vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm
> +vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm
> +vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm
> +vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm
> +vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm
> +vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm
> +vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm
> +vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm
> +vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm
> +vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm
> +vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm
> +vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm
> +vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm
> +vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm
> +vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm
> +vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm
> +vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm
> +vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm
> +vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm
> +vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm
> +vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm
> +vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm
> +vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm
> +vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm
> +vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm
> +vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm
> +vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm
> +vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm
> +vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm
> +vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm
> +vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm
> +vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm
> +vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm
> +vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm
> +vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm
> +vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm
> +vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm
> +vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm
> +vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm
> +vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm
> +vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm
> +vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm
> +vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm
> +vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm
> +vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm
> +vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm
> +vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm
> +vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm
> +vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm
> +vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm
> +vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm
> +vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm
> +vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm
> +vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm
> +vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm
> +vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm
> +vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm
> +vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm
> +vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm
> +vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwredsum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm
> +vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm
> +vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm
> +vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm
> +vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm
> +vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm
> +vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm
> +vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm
> +vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm
> +vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm
> +vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm
> +vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm
> +vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm
> +vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm
> +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm
> +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm
> +vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm
> +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm
> +vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm
> +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm
> +vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm
> +vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm
> +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm
> +vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm
> +vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm
> +vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm
> +vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
> +vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
> diff --git a/target/riscv/insn_trans/trans_rvv.inc.c
> b/target/riscv/insn_trans/trans_rvv.inc.c
> new file mode 100644
> index 0000000..dc8e6ce
> --- /dev/null
> +++ b/target/riscv/insn_trans/trans_rvv.inc.c
> @@ -0,0 +1,484 @@
> +/*
> + * RISC-V translation routines for the RVV Standard Extension.
> + *
> + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
> for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
> along with
> + * this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#define GEN_VECTOR_R2_NFVM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 nf = tcg_const_i32(a->nf); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, d); \
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(nf); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +#define GEN_VECTOR_R_NFVM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 nf = tcg_const_i32(a->nf); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, s2, d);\
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(nf); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +
> +#define GEN_VECTOR_R_WDVM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 wd = tcg_const_i32(a->wd); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, wd, vm, s1, s2, d);\
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(wd); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +#define GEN_VECTOR_R(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + gen_helper_vector_##INSN(cpu_env, s1, s2, d); \
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + return true; \
> +}
> +#define GEN_VECTOR_R2_VM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, vm, s2, d); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +
> +#define GEN_VECTOR_R1_VM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, vm, d); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +#define GEN_VECTOR_R_VM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + TCGv_i32 vm = tcg_const_i32(a->vm); \
> + gen_helper_vector_##INSN(cpu_env, vm, s1, s2, d); \
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(s2); \
> + tcg_temp_free_i32(d); \
> + tcg_temp_free_i32(vm); \
> + return true; \
> +}
> +#define GEN_VECTOR_R2_ZIMM(INSN) \
> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
> +{ \
> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
> + TCGv_i32 zimm = tcg_const_i32(a->zimm); \
> + TCGv_i32 d = tcg_const_i32(a->rd); \
> + gen_helper_vector_##INSN(cpu_env, s1, zimm, d); \
> + tcg_temp_free_i32(s1); \
> + tcg_temp_free_i32(zimm); \
> + tcg_temp_free_i32(d); \
> + return true; \
> +}
> +
> +GEN_VECTOR_R2_NFVM(vlb_v)
> +GEN_VECTOR_R2_NFVM(vlh_v)
> +GEN_VECTOR_R2_NFVM(vlw_v)
> +GEN_VECTOR_R2_NFVM(vle_v)
> +GEN_VECTOR_R2_NFVM(vlbu_v)
> +GEN_VECTOR_R2_NFVM(vlhu_v)
> +GEN_VECTOR_R2_NFVM(vlwu_v)
> +GEN_VECTOR_R2_NFVM(vlbff_v)
> +GEN_VECTOR_R2_NFVM(vlhff_v)
> +GEN_VECTOR_R2_NFVM(vlwff_v)
> +GEN_VECTOR_R2_NFVM(vleff_v)
> +GEN_VECTOR_R2_NFVM(vlbuff_v)
> +GEN_VECTOR_R2_NFVM(vlhuff_v)
> +GEN_VECTOR_R2_NFVM(vlwuff_v)
> +GEN_VECTOR_R2_NFVM(vsb_v)
> +GEN_VECTOR_R2_NFVM(vsh_v)
> +GEN_VECTOR_R2_NFVM(vsw_v)
> +GEN_VECTOR_R2_NFVM(vse_v)
> +
> +GEN_VECTOR_R_NFVM(vlsb_v)
> +GEN_VECTOR_R_NFVM(vlsh_v)
> +GEN_VECTOR_R_NFVM(vlsw_v)
> +GEN_VECTOR_R_NFVM(vlse_v)
> +GEN_VECTOR_R_NFVM(vlsbu_v)
> +GEN_VECTOR_R_NFVM(vlshu_v)
> +GEN_VECTOR_R_NFVM(vlswu_v)
> +GEN_VECTOR_R_NFVM(vssb_v)
> +GEN_VECTOR_R_NFVM(vssh_v)
> +GEN_VECTOR_R_NFVM(vssw_v)
> +GEN_VECTOR_R_NFVM(vsse_v)
> +GEN_VECTOR_R_NFVM(vlxb_v)
> +GEN_VECTOR_R_NFVM(vlxh_v)
> +GEN_VECTOR_R_NFVM(vlxw_v)
> +GEN_VECTOR_R_NFVM(vlxe_v)
> +GEN_VECTOR_R_NFVM(vlxbu_v)
> +GEN_VECTOR_R_NFVM(vlxhu_v)
> +GEN_VECTOR_R_NFVM(vlxwu_v)
> +GEN_VECTOR_R_NFVM(vsxb_v)
> +GEN_VECTOR_R_NFVM(vsxh_v)
> +GEN_VECTOR_R_NFVM(vsxw_v)
> +GEN_VECTOR_R_NFVM(vsxe_v)
> +GEN_VECTOR_R_NFVM(vsuxb_v)
> +GEN_VECTOR_R_NFVM(vsuxh_v)
> +GEN_VECTOR_R_NFVM(vsuxw_v)
> +GEN_VECTOR_R_NFVM(vsuxe_v)
> +
> +GEN_VECTOR_R_WDVM(vamoswapw_v)
> +GEN_VECTOR_R_WDVM(vamoswapd_v)
> +GEN_VECTOR_R_WDVM(vamoaddw_v)
> +GEN_VECTOR_R_WDVM(vamoaddd_v)
> +GEN_VECTOR_R_WDVM(vamoxorw_v)
> +GEN_VECTOR_R_WDVM(vamoxord_v)
> +GEN_VECTOR_R_WDVM(vamoandw_v)
> +GEN_VECTOR_R_WDVM(vamoandd_v)
> +GEN_VECTOR_R_WDVM(vamoorw_v)
> +GEN_VECTOR_R_WDVM(vamoord_v)
> +GEN_VECTOR_R_WDVM(vamominw_v)
> +GEN_VECTOR_R_WDVM(vamomind_v)
> +GEN_VECTOR_R_WDVM(vamomaxw_v)
> +GEN_VECTOR_R_WDVM(vamomaxd_v)
> +GEN_VECTOR_R_WDVM(vamominuw_v)
> +GEN_VECTOR_R_WDVM(vamominud_v)
> +GEN_VECTOR_R_WDVM(vamomaxuw_v)
> +GEN_VECTOR_R_WDVM(vamomaxud_v)
> +
> +GEN_VECTOR_R(vext_x_v)
> +GEN_VECTOR_R(vfmv_f_s)
> +GEN_VECTOR_R(vmv_s_x)
> +GEN_VECTOR_R(vfmv_s_f)
> +GEN_VECTOR_R(vadc_vvm)
> +GEN_VECTOR_R(vadc_vxm)
> +GEN_VECTOR_R(vadc_vim)
> +GEN_VECTOR_R(vmadc_vvm)
> +GEN_VECTOR_R(vmadc_vxm)
> +GEN_VECTOR_R(vmadc_vim)
> +GEN_VECTOR_R(vsbc_vvm)
> +GEN_VECTOR_R(vsbc_vxm)
> +GEN_VECTOR_R(vmsbc_vvm)
> +GEN_VECTOR_R(vmsbc_vxm)
> +GEN_VECTOR_R2_VM(vmpopc_m)
> +GEN_VECTOR_R2_VM(vmfirst_m)
> +GEN_VECTOR_R(vcompress_vm)
> +GEN_VECTOR_R(vmandnot_mm)
> +GEN_VECTOR_R(vmand_mm)
> +GEN_VECTOR_R(vmor_mm)
> +GEN_VECTOR_R(vmxor_mm)
> +GEN_VECTOR_R(vmornot_mm)
> +GEN_VECTOR_R(vmnand_mm)
> +GEN_VECTOR_R(vmnor_mm)
> +GEN_VECTOR_R(vmxnor_mm)
> +GEN_VECTOR_R2_VM(vmsbf_m)
> +GEN_VECTOR_R2_VM(vmsof_m)
> +GEN_VECTOR_R2_VM(vmsif_m)
> +GEN_VECTOR_R2_VM(viota_m)
> +GEN_VECTOR_R1_VM(vid_v)
> +GEN_VECTOR_R2_VM(vfcvt_xu_f_v)
> +GEN_VECTOR_R2_VM(vfcvt_x_f_v)
> +GEN_VECTOR_R2_VM(vfcvt_f_xu_v)
> +GEN_VECTOR_R2_VM(vfcvt_f_x_v)
> +GEN_VECTOR_R2_VM(vfwcvt_xu_f_v)
> +GEN_VECTOR_R2_VM(vfwcvt_x_f_v)
> +GEN_VECTOR_R2_VM(vfwcvt_f_xu_v)
> +GEN_VECTOR_R2_VM(vfwcvt_f_x_v)
> +GEN_VECTOR_R2_VM(vfwcvt_f_f_v)
> +GEN_VECTOR_R2_VM(vfncvt_xu_f_v)
> +GEN_VECTOR_R2_VM(vfncvt_x_f_v)
> +GEN_VECTOR_R2_VM(vfncvt_f_xu_v)
> +GEN_VECTOR_R2_VM(vfncvt_f_x_v)
> +GEN_VECTOR_R2_VM(vfncvt_f_f_v)
> +GEN_VECTOR_R2_VM(vfsqrt_v)
> +GEN_VECTOR_R2_VM(vfclass_v)
> +
> +GEN_VECTOR_R_VM(vadd_vv)
> +GEN_VECTOR_R_VM(vadd_vx)
> +GEN_VECTOR_R_VM(vadd_vi)
> +GEN_VECTOR_R_VM(vredsum_vs)
> +GEN_VECTOR_R_VM(vfadd_vv)
> +GEN_VECTOR_R_VM(vfadd_vf)
> +GEN_VECTOR_R_VM(vredand_vs)
> +GEN_VECTOR_R_VM(vfredsum_vs)
> +GEN_VECTOR_R_VM(vsub_vv)
> +GEN_VECTOR_R_VM(vsub_vx)
> +GEN_VECTOR_R_VM(vredor_vs)
> +GEN_VECTOR_R_VM(vfsub_vv)
> +GEN_VECTOR_R_VM(vfsub_vf)
> +GEN_VECTOR_R_VM(vrsub_vx)
> +GEN_VECTOR_R_VM(vrsub_vi)
> +GEN_VECTOR_R_VM(vredxor_vs)
> +GEN_VECTOR_R_VM(vfredosum_vs)
> +GEN_VECTOR_R_VM(vminu_vv)
> +GEN_VECTOR_R_VM(vminu_vx)
> +GEN_VECTOR_R_VM(vredminu_vs)
> +GEN_VECTOR_R_VM(vfmin_vv)
> +GEN_VECTOR_R_VM(vfmin_vf)
> +GEN_VECTOR_R_VM(vmin_vv)
> +GEN_VECTOR_R_VM(vmin_vx)
> +GEN_VECTOR_R_VM(vredmin_vs)
> +GEN_VECTOR_R_VM(vfredmin_vs)
> +GEN_VECTOR_R_VM(vmaxu_vv)
> +GEN_VECTOR_R_VM(vmaxu_vx)
> +GEN_VECTOR_R_VM(vredmaxu_vs)
> +GEN_VECTOR_R_VM(vfmax_vv)
> +GEN_VECTOR_R_VM(vfmax_vf)
> +GEN_VECTOR_R_VM(vmax_vv)
> +GEN_VECTOR_R_VM(vmax_vx)
> +GEN_VECTOR_R_VM(vredmax_vs)
> +GEN_VECTOR_R_VM(vfredmax_vs)
> +GEN_VECTOR_R_VM(vfsgnj_vv)
> +GEN_VECTOR_R_VM(vfsgnj_vf)
> +GEN_VECTOR_R_VM(vand_vv)
> +GEN_VECTOR_R_VM(vand_vx)
> +GEN_VECTOR_R_VM(vand_vi)
> +GEN_VECTOR_R_VM(vfsgnjn_vv)
> +GEN_VECTOR_R_VM(vfsgnjn_vf)
> +GEN_VECTOR_R_VM(vor_vv)
> +GEN_VECTOR_R_VM(vor_vx)
> +GEN_VECTOR_R_VM(vor_vi)
> +GEN_VECTOR_R_VM(vfsgnjx_vv)
> +GEN_VECTOR_R_VM(vfsgnjx_vf)
> +GEN_VECTOR_R_VM(vxor_vv)
> +GEN_VECTOR_R_VM(vxor_vx)
> +GEN_VECTOR_R_VM(vxor_vi)
> +GEN_VECTOR_R_VM(vrgather_vv)
> +GEN_VECTOR_R_VM(vrgather_vx)
> +GEN_VECTOR_R_VM(vrgather_vi)
> +GEN_VECTOR_R_VM(vslideup_vx)
> +GEN_VECTOR_R_VM(vslideup_vi)
> +GEN_VECTOR_R_VM(vslide1up_vx)
> +GEN_VECTOR_R_VM(vslidedown_vx)
> +GEN_VECTOR_R_VM(vslidedown_vi)
> +GEN_VECTOR_R_VM(vslide1down_vx)
> +GEN_VECTOR_R_VM(vmerge_vvm)
> +GEN_VECTOR_R_VM(vmerge_vxm)
> +GEN_VECTOR_R_VM(vmerge_vim)
> +GEN_VECTOR_R_VM(vfmerge_vfm)
> +GEN_VECTOR_R_VM(vmseq_vv)
> +GEN_VECTOR_R_VM(vmseq_vx)
> +GEN_VECTOR_R_VM(vmseq_vi)
> +GEN_VECTOR_R_VM(vmfeq_vv)
> +GEN_VECTOR_R_VM(vmfeq_vf)
> +GEN_VECTOR_R_VM(vmsne_vv)
> +GEN_VECTOR_R_VM(vmsne_vx)
> +GEN_VECTOR_R_VM(vmsne_vi)
> +GEN_VECTOR_R_VM(vmfle_vv)
> +GEN_VECTOR_R_VM(vmfle_vf)
> +GEN_VECTOR_R_VM(vmsltu_vv)
> +GEN_VECTOR_R_VM(vmsltu_vx)
> +GEN_VECTOR_R_VM(vmford_vv)
> +GEN_VECTOR_R_VM(vmford_vf)
> +GEN_VECTOR_R_VM(vmslt_vv)
> +GEN_VECTOR_R_VM(vmslt_vx)
> +GEN_VECTOR_R_VM(vmflt_vv)
> +GEN_VECTOR_R_VM(vmflt_vf)
> +GEN_VECTOR_R_VM(vmsleu_vv)
> +GEN_VECTOR_R_VM(vmsleu_vx)
> +GEN_VECTOR_R_VM(vmsleu_vi)
> +GEN_VECTOR_R_VM(vmfne_vv)
> +GEN_VECTOR_R_VM(vmfne_vf)
> +GEN_VECTOR_R_VM(vmsle_vv)
> +GEN_VECTOR_R_VM(vmsle_vx)
> +GEN_VECTOR_R_VM(vmsle_vi)
> +GEN_VECTOR_R_VM(vmfgt_vf)
> +GEN_VECTOR_R_VM(vmsgtu_vx)
> +GEN_VECTOR_R_VM(vmsgtu_vi)
> +GEN_VECTOR_R_VM(vmsgt_vx)
> +GEN_VECTOR_R_VM(vmsgt_vi)
> +GEN_VECTOR_R_VM(vmfge_vf)
> +GEN_VECTOR_R_VM(vsaddu_vv)
> +GEN_VECTOR_R_VM(vsaddu_vx)
> +GEN_VECTOR_R_VM(vsaddu_vi)
> +GEN_VECTOR_R_VM(vdivu_vv)
> +GEN_VECTOR_R_VM(vdivu_vx)
> +GEN_VECTOR_R_VM(vfdiv_vv)
> +GEN_VECTOR_R_VM(vfdiv_vf)
> +GEN_VECTOR_R_VM(vsadd_vv)
> +GEN_VECTOR_R_VM(vsadd_vx)
> +GEN_VECTOR_R_VM(vsadd_vi)
> +GEN_VECTOR_R_VM(vdiv_vv)
> +GEN_VECTOR_R_VM(vdiv_vx)
> +GEN_VECTOR_R_VM(vfrdiv_vf)
> +GEN_VECTOR_R_VM(vssubu_vv)
> +GEN_VECTOR_R_VM(vssubu_vx)
> +GEN_VECTOR_R_VM(vremu_vv)
> +GEN_VECTOR_R_VM(vremu_vx)
> +GEN_VECTOR_R_VM(vssub_vv)
> +GEN_VECTOR_R_VM(vssub_vx)
> +GEN_VECTOR_R_VM(vrem_vv)
> +GEN_VECTOR_R_VM(vrem_vx)
> +GEN_VECTOR_R_VM(vaadd_vv)
> +GEN_VECTOR_R_VM(vaadd_vx)
> +GEN_VECTOR_R_VM(vaadd_vi)
> +GEN_VECTOR_R_VM(vmulhu_vv)
> +GEN_VECTOR_R_VM(vmulhu_vx)
> +GEN_VECTOR_R_VM(vfmul_vv)
> +GEN_VECTOR_R_VM(vfmul_vf)
> +GEN_VECTOR_R_VM(vsll_vv)
> +GEN_VECTOR_R_VM(vsll_vx)
> +GEN_VECTOR_R_VM(vsll_vi)
> +GEN_VECTOR_R_VM(vmul_vv)
> +GEN_VECTOR_R_VM(vmul_vx)
> +GEN_VECTOR_R_VM(vasub_vv)
> +GEN_VECTOR_R_VM(vasub_vx)
> +GEN_VECTOR_R_VM(vmulhsu_vv)
> +GEN_VECTOR_R_VM(vmulhsu_vx)
> +GEN_VECTOR_R_VM(vsmul_vv)
> +GEN_VECTOR_R_VM(vsmul_vx)
> +GEN_VECTOR_R_VM(vmulh_vv)
> +GEN_VECTOR_R_VM(vmulh_vx)
> +GEN_VECTOR_R_VM(vfrsub_vf)
> +GEN_VECTOR_R_VM(vsrl_vv)
> +GEN_VECTOR_R_VM(vsrl_vx)
> +GEN_VECTOR_R_VM(vsrl_vi)
> +GEN_VECTOR_R_VM(vfmadd_vv)
> +GEN_VECTOR_R_VM(vfmadd_vf)
> +GEN_VECTOR_R_VM(vsra_vv)
> +GEN_VECTOR_R_VM(vsra_vx)
> +GEN_VECTOR_R_VM(vsra_vi)
> +GEN_VECTOR_R_VM(vmadd_vv)
> +GEN_VECTOR_R_VM(vmadd_vx)
> +GEN_VECTOR_R_VM(vfnmadd_vv)
> +GEN_VECTOR_R_VM(vfnmadd_vf)
> +GEN_VECTOR_R_VM(vssrl_vv)
> +GEN_VECTOR_R_VM(vssrl_vx)
> +GEN_VECTOR_R_VM(vssrl_vi)
> +GEN_VECTOR_R_VM(vfmsub_vv)
> +GEN_VECTOR_R_VM(vfmsub_vf)
> +GEN_VECTOR_R_VM(vssra_vv)
> +GEN_VECTOR_R_VM(vssra_vx)
> +GEN_VECTOR_R_VM(vssra_vi)
> +GEN_VECTOR_R_VM(vnmsub_vv)
> +GEN_VECTOR_R_VM(vnmsub_vx)
> +GEN_VECTOR_R_VM(vfnmsub_vv)
> +GEN_VECTOR_R_VM(vfnmsub_vf)
> +GEN_VECTOR_R_VM(vnsrl_vv)
> +GEN_VECTOR_R_VM(vnsrl_vx)
> +GEN_VECTOR_R_VM(vnsrl_vi)
> +GEN_VECTOR_R_VM(vfmacc_vv)
> +GEN_VECTOR_R_VM(vfmacc_vf)
> +GEN_VECTOR_R_VM(vnsra_vv)
> +GEN_VECTOR_R_VM(vnsra_vx)
> +GEN_VECTOR_R_VM(vnsra_vi)
> +GEN_VECTOR_R_VM(vmacc_vv)
> +GEN_VECTOR_R_VM(vmacc_vx)
> +GEN_VECTOR_R_VM(vfnmacc_vv)
> +GEN_VECTOR_R_VM(vfnmacc_vf)
> +GEN_VECTOR_R_VM(vnclipu_vv)
> +GEN_VECTOR_R_VM(vnclipu_vx)
> +GEN_VECTOR_R_VM(vnclipu_vi)
> +GEN_VECTOR_R_VM(vfmsac_vv)
> +GEN_VECTOR_R_VM(vfmsac_vf)
> +GEN_VECTOR_R_VM(vnclip_vv)
> +GEN_VECTOR_R_VM(vnclip_vx)
> +GEN_VECTOR_R_VM(vnclip_vi)
> +GEN_VECTOR_R_VM(vnmsac_vv)
> +GEN_VECTOR_R_VM(vnmsac_vx)
> +GEN_VECTOR_R_VM(vfnmsac_vv)
> +GEN_VECTOR_R_VM(vfnmsac_vf)
> +GEN_VECTOR_R_VM(vwredsumu_vs)
> +GEN_VECTOR_R_VM(vwaddu_vv)
> +GEN_VECTOR_R_VM(vwaddu_vx)
> +GEN_VECTOR_R_VM(vfwadd_vv)
> +GEN_VECTOR_R_VM(vfwadd_vf)
> +GEN_VECTOR_R_VM(vwredsum_vs)
> +GEN_VECTOR_R_VM(vwadd_vv)
> +GEN_VECTOR_R_VM(vwadd_vx)
> +GEN_VECTOR_R_VM(vfwredsum_vs)
> +GEN_VECTOR_R_VM(vwsubu_vv)
> +GEN_VECTOR_R_VM(vwsubu_vx)
> +GEN_VECTOR_R_VM(vfwsub_vv)
> +GEN_VECTOR_R_VM(vfwsub_vf)
> +GEN_VECTOR_R_VM(vwsub_vv)
> +GEN_VECTOR_R_VM(vwsub_vx)
> +GEN_VECTOR_R_VM(vfwredosum_vs)
> +GEN_VECTOR_R_VM(vwaddu_wv)
> +GEN_VECTOR_R_VM(vwaddu_wx)
> +GEN_VECTOR_R_VM(vfwadd_wv)
> +GEN_VECTOR_R_VM(vfwadd_wf)
> +GEN_VECTOR_R_VM(vwadd_wv)
> +GEN_VECTOR_R_VM(vwadd_wx)
> +GEN_VECTOR_R_VM(vwsubu_wv)
> +GEN_VECTOR_R_VM(vwsubu_wx)
> +GEN_VECTOR_R_VM(vfwsub_wv)
> +GEN_VECTOR_R_VM(vfwsub_wf)
> +GEN_VECTOR_R_VM(vwsub_wv)
> +GEN_VECTOR_R_VM(vwsub_wx)
> +GEN_VECTOR_R_VM(vwmulu_vv)
> +GEN_VECTOR_R_VM(vwmulu_vx)
> +GEN_VECTOR_R_VM(vfwmul_vv)
> +GEN_VECTOR_R_VM(vfwmul_vf)
> +GEN_VECTOR_R_VM(vwmulsu_vv)
> +GEN_VECTOR_R_VM(vwmulsu_vx)
> +GEN_VECTOR_R_VM(vwmul_vv)
> +GEN_VECTOR_R_VM(vwmul_vx)
> +GEN_VECTOR_R_VM(vwsmaccu_vv)
> +GEN_VECTOR_R_VM(vwsmaccu_vx)
> +GEN_VECTOR_R_VM(vwmaccu_vv)
> +GEN_VECTOR_R_VM(vwmaccu_vx)
> +GEN_VECTOR_R_VM(vfwmacc_vv)
> +GEN_VECTOR_R_VM(vfwmacc_vf)
> +GEN_VECTOR_R_VM(vwsmacc_vv)
> +GEN_VECTOR_R_VM(vwsmacc_vx)
> +GEN_VECTOR_R_VM(vwmacc_vv)
> +GEN_VECTOR_R_VM(vwmacc_vx)
> +GEN_VECTOR_R_VM(vfwnmacc_vv)
> +GEN_VECTOR_R_VM(vfwnmacc_vf)
> +GEN_VECTOR_R_VM(vwsmaccsu_vv)
> +GEN_VECTOR_R_VM(vwsmaccsu_vx)
> +GEN_VECTOR_R_VM(vwmaccsu_vv)
> +GEN_VECTOR_R_VM(vwmaccsu_vx)
> +GEN_VECTOR_R_VM(vfwmsac_vv)
> +GEN_VECTOR_R_VM(vfwmsac_vf)
> +GEN_VECTOR_R_VM(vwsmaccus_vx)
> +GEN_VECTOR_R_VM(vwmaccus_vx)
> +GEN_VECTOR_R_VM(vfwnmsac_vv)
> +GEN_VECTOR_R_VM(vfwnmsac_vf)
> +GEN_VECTOR_R2_ZIMM(vsetvli)
> +GEN_VECTOR_R(vsetvl)
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 8d6ab73..587c23e 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -706,6 +706,7 @@ static bool gen_shift(DisasContext *ctx, arg_r *a,
> #include "insn_trans/trans_rva.inc.c"
> #include "insn_trans/trans_rvf.inc.c"
> #include "insn_trans/trans_rvd.inc.c"
> +#include "insn_trans/trans_rvv.inc.c"
> #include "insn_trans/trans_privileged.inc.c"
>
> /*
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> new file mode 100644
> index 0000000..1f8f1ec
> --- /dev/null
> +++ b/target/riscv/vector_helper.c
> @@ -0,0 +1,26563 @@
> +/*
> + * RISC-V Vectore Extension Helpers for QEMU.
> + *
> + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
> for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
> along with
> + * this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/log.h"
> +#include "cpu.h"
> +#include "qemu/main-loop.h"
> +#include "exec/exec-all.h"
> +#include "exec/helper-proto.h"
> +#include "exec/translator.h"
> +#include "exec/cpu_ldst.h"
> +#include <math.h>
> +#include "instmap.h"
> +
> +#define VECTOR_HELPER(name) HELPER(glue(vector_, name))
> +#define SIGNBIT8 (1 << 7)
> +#define MAX_U8 ((uint8_t)0xff)
> +#define MIN_U8 ((uint8_t)0x0)
> +#define MAX_S8 ((int8_t)0x7f)
> +#define MIN_S8 ((int8_t)0x80)
> +#define SIGNBIT16 (1 << 15)
> +#define MAX_U16 ((uint16_t)0xffff)
> +#define MIN_U16 ((uint16_t)0x0)
> +#define MAX_S16 ((int16_t)0x7fff)
> +#define MIN_S16 ((int16_t)0x8000)
> +#define SIGNBIT32 (1 << 31)
> +#define MAX_U32 ((uint32_t)0xffffffff)
> +#define MIN_U32 ((uint32_t)0x0)
> +#define MAX_S32 ((int32_t)0x7fffffff)
> +#define MIN_S32 ((int32_t)0x80000000)
> +#define SIGNBIT64 ((uint64_t)1 << 63)
> +#define MAX_U64 ((uint64_t)0xffffffffffffffff)
> +#define MIN_U64 ((uint64_t)0x0)
> +#define MAX_S64 ((int64_t)0x7fffffffffffffff)
> +#define MIN_S64 ((int64_t)0x8000000000000000)
> +
> +static int64_t sign_extend(int64_t a, int8_t width)
> +{
> + return a << (64 - width) >> (64 - width);
> +}
> +
> +static int64_t extend_gpr(target_ulong reg)
> +{
> + return sign_extend(reg, sizeof(target_ulong) * 8);
> +}
> +
> +static target_ulong vector_get_index(CPURISCVState *env, int rs1, int rs2,
> + int index, int mem, int width, int nf)
> +{
> + target_ulong abs_off, base = env->gpr[rs1];
> + target_long offset;
> + switch (width) {
> + case 8:
> + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + nf * mem;
> + break;
> + case 16:
> + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + nf *
> mem;
> + break;
> + case 32:
> + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + nf *
> mem;
> + break;
> + case 64:
> + offset = env->vfp.vreg[rs2].s64[index] + nf * mem;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return 0;
> + }
> + if (offset < 0) {
> + abs_off = ~offset + 1;
> + if (base >= abs_off) {
> + return base - abs_off;
> + }
> + } else {
> + if ((target_ulong)((target_ulong)offset + base) >= base) {
> + return (target_ulong)offset + base;
> + }
> + }
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return 0;
> +}
> +
> +
> +
> +/* ADD/SUB/COMPARE instructions. */
> +static inline uint8_t sat_add_u8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint8_t res = a + b;
> + if (res < a) {
> + res = MAX_U8;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint16_t sat_add_u16(CPURISCVState *env, uint16_t a,
> uint16_t b)
> +{
> + uint16_t res = a + b;
> + if (res < a) {
> + res = MAX_U16;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint32_t sat_add_u32(CPURISCVState *env, uint32_t a,
> uint32_t b)
> +{
> + uint32_t res = a + b;
> + if (res < a) {
> + res = MAX_U32;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint64_t sat_add_u64(CPURISCVState *env, uint64_t a,
> uint64_t b)
> +{
> + uint64_t res = a + b;
> + if (res < a) {
> + res = MAX_U64;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint8_t sat_add_s8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint8_t res = a + b;
> + if (((res ^ a) & SIGNBIT8) && !((a ^ b) & SIGNBIT8)) {
> + res = ~(((int8_t)a >> 7) ^ SIGNBIT8);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint16_t sat_add_s16(CPURISCVState *env, uint16_t a,
> uint16_t b)
> +{
> + uint16_t res = a + b;
> + if (((res ^ a) & SIGNBIT16) && !((a ^ b) & SIGNBIT16)) {
> + res = ~(((int16_t)a >> 15) ^ SIGNBIT16);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint32_t sat_add_s32(CPURISCVState *env, uint32_t a,
> uint32_t b)
> +{
> + uint32_t res = a + b;
> + if (((res ^ a) & SIGNBIT32) && !((a ^ b) & SIGNBIT32)) {
> + res = ~(((int32_t)a >> 31) ^ SIGNBIT32);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint64_t sat_add_s64(CPURISCVState *env, uint64_t a,
> uint64_t b)
> +{
> + uint64_t res = a + b;
> + if (((res ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
> + res = ~(((int64_t)a >> 63) ^ SIGNBIT64);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint8_t sat_sub_u8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint8_t res = a - b;
> + if (res > a) {
> + res = 0;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint16_t sat_sub_u16(CPURISCVState *env, uint16_t a,
> uint16_t b)
> +{
> + uint16_t res = a - b;
> + if (res > a) {
> + res = 0;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint32_t sat_sub_u32(CPURISCVState *env, uint32_t a,
> uint32_t b)
> +{
> + uint32_t res = a - b;
> + if (res > a) {
> + res = 0;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint64_t sat_sub_u64(CPURISCVState *env, uint64_t a,
> uint64_t b)
> +{
> + uint64_t res = a - b;
> + if (res > a) {
> + res = 0;
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint8_t sat_sub_s8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint8_t res = a - b;
> + if (((res ^ a) & SIGNBIT8) && ((a ^ b) & SIGNBIT8)) {
> + res = ~(((int8_t)a >> 7) ^ SIGNBIT8);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint16_t sat_sub_s16(CPURISCVState *env, uint16_t a,
> uint16_t b)
> +{
> + uint16_t res = a - b;
> + if (((res ^ a) & SIGNBIT16) && ((a ^ b) & SIGNBIT16)) {
> + res = ~(((int16_t)a >> 15) ^ SIGNBIT16);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint32_t sat_sub_s32(CPURISCVState *env, uint32_t a,
> uint32_t b)
> +{
> + uint32_t res = a - b;
> + if (((res ^ a) & SIGNBIT32) && ((a ^ b) & SIGNBIT32)) {
> + res = ~(((int32_t)a >> 31) ^ SIGNBIT32);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static inline uint64_t sat_sub_s64(CPURISCVState *env, uint64_t a,
> uint64_t b)
> +{
> + uint64_t res = a - b;
> + if (((res ^ a) & SIGNBIT64) && ((a ^ b) & SIGNBIT64)) {
> + res = ~(((int64_t)a >> 63) ^ SIGNBIT64);
> + env->vfp.vxsat = 0x1;
> +
> + }
> + return res;
> +}
> +
> +static uint64_t fix_data_round(CPURISCVState *env, uint64_t result,
> + uint8_t shift)
> +{
> + uint64_t lsb_1 = (uint64_t)1 << shift;
> + int mod = env->vfp.vxrm;
> + int mask = ((uint64_t)1 << shift) - 1;
> +
> + if (mod == 0x0) { /* rnu */
> + return lsb_1 >> 1;
> + } else if (mod == 0x1) { /* rne */
> + if ((result & mask) > (lsb_1 >> 1) ||
> + (((result & mask) == (lsb_1 >> 1)) &&
> + (((result >> shift) & 0x1)) == 1)) {
> + return lsb_1 >> 1;
> + }
> + } else if (mod == 0x3) { /* rod */
> + if (((result & mask) >= 0x1) && (((result >> shift) & 0x1) == 0))
> {
> + return lsb_1;
> + }
> + }
> + return 0;
> +}
> +
> +static int8_t saturate_s8(CPURISCVState *env, int16_t res)
> +{
> + if (res > MAX_S8) {
> + env->vfp.vxsat = 0x1;
> + return MAX_S8;
> + } else if (res < MIN_S8) {
> + env->vfp.vxsat = 0x1;
> + return MIN_S8;
> + } else {
> + return res;
> + }
> +}
> +
> +static uint8_t saturate_u8(CPURISCVState *env, uint16_t res)
> +{
> + if (res > MAX_U8) {
> + env->vfp.vxsat = 0x1;
> + return MAX_U8;
> + } else {
> + return res;
> + }
> +}
> +
> +static uint16_t saturate_u16(CPURISCVState *env, uint32_t res)
> +{
> + if (res > MAX_U16) {
> + env->vfp.vxsat = 0x1;
> + return MAX_U16;
> + } else {
> + return res;
> + }
> +}
> +
> +static uint32_t saturate_u32(CPURISCVState *env, uint64_t res)
> +{
> + if (res > MAX_U32) {
> + env->vfp.vxsat = 0x1;
> + return MAX_U32;
> + } else {
> + return res;
> + }
> +}
> +
> +static int16_t saturate_s16(CPURISCVState *env, int32_t res)
> +{
> + if (res > MAX_S16) {
> + env->vfp.vxsat = 0x1;
> + return MAX_S16;
> + } else if (res < MIN_S16) {
> + env->vfp.vxsat = 0x1;
> + return MIN_S16;
> + } else {
> + return res;
> + }
> +}
> +
> +static int32_t saturate_s32(CPURISCVState *env, int64_t res)
> +{
> + if (res > MAX_S32) {
> + env->vfp.vxsat = 0x1;
> + return MAX_S32;
> + } else if (res < MIN_S32) {
> + env->vfp.vxsat = 0x1;
> + return MIN_S32;
> + } else {
> + return res;
> + }
> +}
> +static uint16_t vwsmaccu_8(CPURISCVState *env, uint8_t a, uint8_t b,
> + uint16_t c)
> +{
> + uint16_t round, res;
> + uint16_t product = (uint16_t)a * (uint16_t)b;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)product, 4);
> + res = (round + product) >> 4;
> + return sat_add_u16(env, c, res);
> +}
> +
> +static uint32_t vwsmaccu_16(CPURISCVState *env, uint16_t a, uint16_t b,
> + uint32_t c)
> +{
> + uint32_t round, res;
> + uint32_t product = (uint32_t)a * (uint32_t)b;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)product, 8);
> + res = (round + product) >> 8;
> + return sat_add_u32(env, c, res);
> +}
> +
> +static uint64_t vwsmaccu_32(CPURISCVState *env, uint32_t a, uint32_t b,
> + uint64_t c)
> +{
> + uint64_t round, res;
> + uint64_t product = (uint64_t)a * (uint64_t)b;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)product, 16);
> + res = (round + product) >> 16;
> + return sat_add_u64(env, c, res);
> +}
> +
> +static int16_t vwsmacc_8(CPURISCVState *env, int8_t a, int8_t b,
> + int16_t c)
> +{
> + int16_t round, res;
> + int16_t product = (int16_t)a * (int16_t)b;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)product, 4);
> + res = (int16_t)(round + product) >> 4;
> + return sat_add_s16(env, c, res);
> +}
> +
> +static int32_t vwsmacc_16(CPURISCVState *env, int16_t a, int16_t b,
> + int32_t c)
> +{
> + int32_t round, res;
> + int32_t product = (int32_t)a * (int32_t)b;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)product, 8);
> + res = (int32_t)(round + product) >> 8;
> + return sat_add_s32(env, c, res);
> +}
> +
> +static int64_t vwsmacc_32(CPURISCVState *env, int32_t a, int32_t b,
> + int64_t c)
> +{
> + int64_t round, res;
> + int64_t product = (int64_t)a * (int64_t)b;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)product, 16);
> + res = (int64_t)(round + product) >> 16;
> + return sat_add_s64(env, c, res);
> +}
> +
> +static int16_t vwsmaccsu_8(CPURISCVState *env, uint8_t a, int8_t b,
> + int16_t c)
> +{
> + int16_t round, res;
> + int16_t product = (uint16_t)a * (int16_t)b;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)product, 4);
> + res = (round + product) >> 4;
> + return sat_sub_s16(env, c, res);
> +}
> +
> +static int32_t vwsmaccsu_16(CPURISCVState *env, uint16_t a, int16_t b,
> + uint32_t c)
> +{
> + int32_t round, res;
> + int32_t product = (uint32_t)a * (int32_t)b;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)product, 8);
> + res = (round + product) >> 8;
> + return sat_sub_s32(env, c, res);
> +}
> +
> +static int64_t vwsmaccsu_32(CPURISCVState *env, uint32_t a, int32_t b,
> + int64_t c)
> +{
> + int64_t round, res;
> + int64_t product = (uint64_t)a * (int64_t)b;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)product, 16);
> + res = (round + product) >> 16;
> + return sat_sub_s64(env, c, res);
> +}
> +
> +static int16_t vwsmaccus_8(CPURISCVState *env, int8_t a, uint8_t b,
> + int16_t c)
> +{
> + int16_t round, res;
> + int16_t product = (int16_t)a * (uint16_t)b;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)product, 4);
> + res = (round + product) >> 4;
> + return sat_sub_s16(env, c, res);
> +}
> +
> +static int32_t vwsmaccus_16(CPURISCVState *env, int16_t a, uint16_t b,
> + int32_t c)
> +{
> + int32_t round, res;
> + int32_t product = (int32_t)a * (uint32_t)b;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)product, 8);
> + res = (round + product) >> 8;
> + return sat_sub_s32(env, c, res);
> +}
> +
> +static uint64_t vwsmaccus_32(CPURISCVState *env, int32_t a, uint32_t b,
> + int64_t c)
> +{
> + int64_t round, res;
> + int64_t product = (int64_t)a * (uint64_t)b;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)product, 16);
> + res = (round + product) >> 16;
> + return sat_sub_s64(env, c, res);
> +}
> +
> +static int8_t vssra_8(CPURISCVState *env, int8_t a, uint8_t b)
> +{
> + int16_t round, res;
> + uint8_t shift = b & 0x7;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return res;
> +}
> +
> +static int16_t vssra_16(CPURISCVState *env, int16_t a, uint16_t b)
> +{
> + int32_t round, res;
> + uint8_t shift = b & 0xf;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static int32_t vssra_32(CPURISCVState *env, int32_t a, uint32_t b)
> +{
> + int64_t round, res;
> + uint8_t shift = b & 0x1f;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static int64_t vssra_64(CPURISCVState *env, int64_t a, uint64_t b)
> +{
> + int64_t round, res;
> + uint8_t shift = b & 0x3f;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a >> (shift - 1)) + (round >> (shift - 1));
> + return res >> 1;
> +}
> +
> +static int8_t vssrai_8(CPURISCVState *env, int8_t a, uint8_t b)
> +{
> + int16_t round, res;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static int16_t vssrai_16(CPURISCVState *env, int16_t a, uint8_t b)
> +{
> + int32_t round, res;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static int32_t vssrai_32(CPURISCVState *env, int32_t a, uint8_t b)
> +{
> + int64_t round, res;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static int64_t vssrai_64(CPURISCVState *env, int64_t a, uint8_t b)
> +{
> + int64_t round, res;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a >> (b - 1)) + (round >> (b - 1));
> + return res >> 1;
> +}
> +
> +static int8_t vnclip_16(CPURISCVState *env, int16_t a, uint8_t b)
> +{
> + int16_t round, res;
> + uint8_t shift = b & 0xf;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_s8(env, res);
> +}
> +
> +static int16_t vnclip_32(CPURISCVState *env, int32_t a, uint16_t b)
> +{
> + int32_t round, res;
> + uint8_t shift = b & 0x1f;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return saturate_s16(env, res);
> +}
> +
> +static int32_t vnclip_64(CPURISCVState *env, int64_t a, uint32_t b)
> +{
> + int64_t round, res;
> + uint8_t shift = b & 0x3f;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_s32(env, res);
> +}
> +
> +static int8_t vnclipi_16(CPURISCVState *env, int16_t a, uint8_t b)
> +{
> + int16_t round, res;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_s8(env, res);
> +}
> +
> +static int16_t vnclipi_32(CPURISCVState *env, int32_t a, uint8_t b)
> +{
> + int32_t round, res;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_s16(env, res);
> +}
> +
> +static int32_t vnclipi_64(CPURISCVState *env, int64_t a, uint8_t b)
> +{
> + int32_t round, res;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_s32(env, res);
> +}
> +
> +static uint8_t vnclipu_16(CPURISCVState *env, uint16_t a, uint8_t b)
> +{
> + uint16_t round, res;
> + uint8_t shift = b & 0xf;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_u8(env, res);
> +}
> +
> +static uint16_t vnclipu_32(CPURISCVState *env, uint32_t a, uint16_t b)
> +{
> + uint32_t round, res;
> + uint8_t shift = b & 0x1f;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_u16(env, res);
> +}
> +
> +static uint32_t vnclipu_64(CPURISCVState *env, uint64_t a, uint32_t b)
> +{
> + uint64_t round, res;
> + uint8_t shift = b & 0x3f;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> +
> + return saturate_u32(env, res);
> +}
> +
> +static uint8_t vnclipui_16(CPURISCVState *env, uint16_t a, uint8_t b)
> +{
> + uint16_t round, res;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_u8(env, res);
> +}
> +
> +static uint16_t vnclipui_32(CPURISCVState *env, uint32_t a, uint8_t b)
> +{
> + uint32_t round, res;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_u16(env, res);
> +}
> +
> +static uint32_t vnclipui_64(CPURISCVState *env, uint64_t a, uint8_t b)
> +{
> + uint64_t round, res;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> +
> + return saturate_u32(env, res);
> +}
> +
> +static uint8_t vssrl_8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint16_t round, res;
> + uint8_t shift = b & 0x7;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static uint16_t vssrl_16(CPURISCVState *env, uint16_t a, uint16_t b)
> +{
> + uint32_t round, res;
> + uint8_t shift = b & 0xf;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static uint32_t vssrl_32(CPURISCVState *env, uint32_t a, uint32_t b)
> +{
> + uint64_t round, res;
> + uint8_t shift = b & 0x1f;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a + round) >> shift;
> + return res;
> +}
> +
> +static uint64_t vssrl_64(CPURISCVState *env, uint64_t a, uint64_t b)
> +{
> + uint64_t round, res;
> + uint8_t shift = b & 0x3f;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift);
> + res = (a >> (shift - 1)) + (round >> (shift - 1));
> + return res >> 1;
> +}
> +
> +static uint8_t vssrli_8(CPURISCVState *env, uint8_t a, uint8_t b)
> +{
> + uint16_t round, res;
> +
> + round = (uint16_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static uint16_t vssrli_16(CPURISCVState *env, uint16_t a, uint8_t b)
> +{
> + uint32_t round, res;
> +
> + round = (uint32_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static uint32_t vssrli_32(CPURISCVState *env, uint32_t a, uint8_t b)
> +{
> + uint64_t round, res;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a + round) >> b;
> + return res;
> +}
> +
> +static uint64_t vssrli_64(CPURISCVState *env, uint64_t a, uint8_t b)
> +{
> + uint64_t round, res;
> +
> + round = (uint64_t)fix_data_round(env, (uint64_t)a, b);
> + res = (a >> (b - 1)) + (round >> (b - 1));
> + return res >> 1;
> +}
> +
> +static int8_t vsmul_8(CPURISCVState *env, int8_t a, int8_t b)
> +{
> + int16_t round;
> + int8_t res;
> + int16_t product = (int16_t)a * (int16_t)b;
> +
> + if (a == MIN_S8 && b == MIN_S8) {
> + env->vfp.vxsat = 1;
> +
> + return MAX_S8;
> + }
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)product, 7);
> + res = sat_add_s16(env, product, round) >> 7;
> + return res;
> +}
> +
> +
> +static int16_t vsmul_16(CPURISCVState *env, int16_t a, int16_t b)
> +{
> + int32_t round;
> + int16_t res;
> + int32_t product = (int32_t)a * (int32_t)b;
> +
> + if (a == MIN_S16 && b == MIN_S16) {
> + env->vfp.vxsat = 1;
> +
> + return MAX_S16;
> + }
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)product, 15);
> + res = sat_add_s32(env, product, round) >> 15;
> + return res;
> +}
> +
> +static int32_t vsmul_32(CPURISCVState *env, int32_t a, int32_t b)
> +{
> + int64_t round;
> + int32_t res;
> + int64_t product = (int64_t)a * (int64_t)b;
> +
> + if (a == MIN_S32 && b == MIN_S32) {
> + env->vfp.vxsat = 1;
> +
> + return MAX_S32;
> + }
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)product, 31);
> + res = sat_add_s64(env, product, round) >> 31;
> + return res;
> +}
> +
> +
> +static int64_t vsmul_64(CPURISCVState *env, int64_t a, int64_t b)
> +{
> + int64_t res;
> + uint64_t abs_a = a, abs_b = b;
> + uint64_t lo_64, hi_64, carry, round;
> +
> + if (a == MIN_S64 && b == MIN_S64) {
> + env->vfp.vxsat = 1;
> +
> + return MAX_S64;
> + }
> +
> + if (a < 0) {
> + abs_a = ~a + 1;
> + }
> + if (b < 0) {
> + abs_b = ~b + 1;
> + }
> +
> + /* first get the whole product in {hi_64, lo_64} */
> + uint64_t a_hi = abs_a >> 32;
> + uint64_t a_lo = (uint32_t)abs_a;
> + uint64_t b_hi = abs_b >> 32;
> + uint64_t b_lo = (uint32_t)abs_b;
> +
> + /*
> + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> + * (a_lo * b_hi) << 32 + a_lo * b_lo
> + * = {hi_64, lo_64}
> + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo *
> b_lo)) >> 64
> + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32)
> >> 32
> + */
> +
> + lo_64 = abs_a * abs_b;
> + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + (uint64_t)(uint32_t)(a_lo * b_hi) +
> + ((a_lo * b_lo) >> 32)) >> 32;
> +
> + hi_64 = a_hi * b_hi +
> + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> + carry;
> +
> + if ((a ^ b) & SIGNBIT64) {
> + lo_64 = ~lo_64;
> + hi_64 = ~hi_64;
> + if (lo_64 == MAX_U64) {
> + lo_64 = 0;
> + hi_64 += 1;
> + } else {
> + lo_64 += 1;
> + }
> + }
> +
> + /* set rem and res */
> + round = fix_data_round(env, lo_64, 63);
> + if ((lo_64 + round) < lo_64) {
> + hi_64 += 1;
> + res = (hi_64 << 1);
> + } else {
> + res = (hi_64 << 1) | ((lo_64 + round) >> 63);
> + }
> +
> + return res;
> +}
> +static inline int8_t avg_round_s8(CPURISCVState *env, int8_t a, int8_t b)
> +{
> + int16_t round;
> + int8_t res;
> + int16_t sum = a + b;
> +
> + round = (int16_t)fix_data_round(env, (uint64_t)sum, 1);
> + res = (sum + round) >> 1;
> +
> + return res;
> +}
> +
> +static inline int16_t avg_round_s16(CPURISCVState *env, int16_t a,
> int16_t b)
> +{
> + int32_t round;
> + int16_t res;
> + int32_t sum = a + b;
> +
> + round = (int32_t)fix_data_round(env, (uint64_t)sum, 1);
> + res = (sum + round) >> 1;
> +
> + return res;
> +}
> +
> +static inline int32_t avg_round_s32(CPURISCVState *env, int32_t a,
> int32_t b)
> +{
> + int64_t round;
> + int32_t res;
> + int64_t sum = a + b;
> +
> + round = (int64_t)fix_data_round(env, (uint64_t)sum, 1);
> + res = (sum + round) >> 1;
> +
> + return res;
> +}
> +
> +static inline int64_t avg_round_s64(CPURISCVState *env, int64_t a,
> int64_t b)
> +{
> + int64_t rem = (a & 0x1) + (b & 0x1);
> + int64_t res = (a >> 1) + (b >> 1) + (rem >> 1);
> + int mod = env->vfp.vxrm;
> +
> + if (mod == 0x0) { /* rnu */
> + if (rem == 0x1) {
> + return res + 1;
> + }
> + } else if (mod == 0x1) { /* rne */
> + if ((rem & 0x1) == 1 && ((res & 0x1) == 1)) {
> + return res + 1;
> + }
> + } else if (mod == 0x3) { /* rod */
> + if (((rem & 0x1) >= 0x1) && (res & 0x1) == 0) {
> + return res + 1;
> + }
> + }
> + return res;
> +}
> +
> +static target_ulong helper_fclass_h(uint64_t frs1)
> +{
> + float16 f = frs1;
> + bool sign = float16_is_neg(f);
> +
> + if (float16_is_infinity(f)) {
> + return sign ? 1 << 0 : 1 << 7;
> + } else if (float16_is_zero(f)) {
> + return sign ? 1 << 3 : 1 << 4;
> + } else if (float16_is_zero_or_denormal(f)) {
> + return sign ? 1 << 2 : 1 << 5;
> + } else if (float16_is_any_nan(f)) {
> + float_status s = { }; /* for snan_bit_is_one */
> + return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
> + } else {
> + return sign ? 1 << 1 : 1 << 6;
> + }
> +}
> +
> +static inline bool vector_vtype_ill(CPURISCVState *env)
> +{
> + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline void vector_vtype_set_ill(CPURISCVState *env)
> +{
> + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1);
> + return;
> +}
> +
> +static inline int vector_vtype_get_sew(CPURISCVState *env)
> +{
> + return (env->vfp.vtype >> 2) & 0x7;
> +}
> +
> +static inline int vector_get_width(CPURISCVState *env)
> +{
> + return 8 * (1 << vector_vtype_get_sew(env));
> +}
> +
> +static inline int vector_get_lmul(CPURISCVState *env)
> +{
> + return 1 << (env->vfp.vtype & 0x3);
> +}
> +
> +static inline int vector_get_vlmax(CPURISCVState *env)
> +{
> + return vector_get_lmul(env) * VLEN / vector_get_width(env);
> +}
> +
> +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int
> width,
> + int lmul, int index)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / 8;
> + int pos = (index * mlen) % 8;
> +
> + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1);
> +}
> +
> +static inline bool vector_overlap_vm_common(int lmul, int vm, int rd)
> +{
> + if (lmul > 1 && vm == 0 && rd == 0) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline bool vector_overlap_vm_force(int vm, int rd)
> +{
> + if (vm == 0 && rd == 0) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline bool vector_overlap_carry(int lmul, int rd)
> +{
> + if (lmul > 1 && rd == 0) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline bool vector_overlap_dstgp_srcgp(int rd, int dlen, int rs,
> + int slen)
> +{
> + if ((rd >= rs && rd < rs + slen) || (rs >= rd && rs < rd + dlen)) {
> + return true;
> + }
> + return false;
> +}
> +
> +static inline uint64_t vector_get_mask(int start, int end)
> +{
> + return ((uint64_t)(~((uint64_t)0))) << (63 - end + start) >> (63 -
> end);
> +}
> +
> +/* fetch unsigned element by width */
> +static inline uint64_t vector_get_iu_elem(CPURISCVState *env, uint32_t
> width,
> + uint32_t rs2, uint32_t index)
> +{
> + uint64_t elem;
> + if (width == 8) {
> + elem = env->vfp.vreg[rs2].u8[index];
> + } else if (width == 16) {
> + elem = env->vfp.vreg[rs2].u16[index];
> + } else if (width == 32) {
> + elem = env->vfp.vreg[rs2].u32[index];
> + } else if (width == 64) {
> + elem = env->vfp.vreg[rs2].u64[index];
> + } else { /* the max of (XLEN, FLEN) is no bigger than 64 */
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return 0;
> + }
> + return elem;
> +}
> +
> +static inline int vector_mask_reg(CPURISCVState *env, uint32_t reg, int
> width,
> + int lmul, int index)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / 8;
> + int pos = (index * mlen) % 8;
> + return (env->vfp.vreg[reg].u8[idx] >> pos) & 0x1;
> +}
> +
> +static inline void vector_mask_result(CPURISCVState *env, uint32_t reg,
> + int width, int lmul, int index, uint32_t result)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / width;
> + int pos = (index * mlen) % width;
> + uint64_t mask = ~((((uint64_t)1 << mlen) - 1) << pos);
> +
> + switch (width) {
> + case 8:
> + env->vfp.vreg[reg].u8[idx] = (env->vfp.vreg[reg].u8[idx] & mask)
> + | (result << pos);
> + break;
> + case 16:
> + env->vfp.vreg[reg].u16[idx] = (env->vfp.vreg[reg].u16[idx] & mask)
> + | (result << pos);
> + break;
> + case 32:
> + env->vfp.vreg[reg].u32[idx] = (env->vfp.vreg[reg].u32[idx] & mask)
> + | (result << pos);
> + break;
> + case 64:
> + env->vfp.vreg[reg].u64[idx] = (env->vfp.vreg[reg].u64[idx] & mask)
> + | ((uint64_t)result <<
> pos);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + break;
> + }
> +
> + return;
> +}
> +
> +/**
> + * deposit16:
> + * @value: initial value to insert bit field into
> + * @start: the lowest bit in the bit field (numbered from 0)
> + * @length: the length of the bit field
> + * @fieldval: the value to insert into the bit field
> + *
> + * Deposit @fieldval into the 16 bit @value at the bit field specified
> + * by the @start and @length parameters, and return the modified
> + * @value. Bits of @value outside the bit field are not modified.
> + * Bits of @fieldval above the least significant @length bits are
> + * ignored. The bit field must lie entirely within the 16 bit word.
> + * It is valid to request that all 16 bits are modified (ie @length
> + * 16 and @start 0).
> + *
> + * Returns: the modified @value.
> + */
> +static inline uint16_t deposit16(uint16_t value, int start, int length,
> + uint16_t fieldval)
> +{
> + uint16_t mask;
> + assert(start >= 0 && length > 0 && length <= 16 - start);
> + mask = (~0U >> (16 - length)) << start;
> + return (value & ~mask) | ((fieldval << start) & mask);
> +}
> +
> +static void vector_tail_amo(CPURISCVState *env, int vreg, int index, int
> width)
> +{
> + switch (width) {
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 64:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_common(CPURISCVState *env, int vreg, int index,
> + int width)
> +{
> + switch (width) {
> + case 8:
> + env->vfp.vreg[vreg].u8[index] = 0;
> + break;
> + case 16:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 64:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_segment(CPURISCVState *env, int vreg, int index,
> + int width, int nf, int lmul)
> +{
> + switch (width) {
> + case 8:
> + while (nf >= 0) {
> + env->vfp.vreg[vreg + nf * lmul].u8[index] = 0;
> + nf--;
> + }
> + break;
> + case 16:
> + while (nf >= 0) {
> + env->vfp.vreg[vreg + nf * lmul].u16[index] = 0;
> + nf--;
> + }
> + break;
> + case 32:
> + while (nf >= 0) {
> + env->vfp.vreg[vreg + nf * lmul].u32[index] = 0;
> + nf--;
> + }
> + break;
> + case 64:
> + while (nf >= 0) {
> + env->vfp.vreg[vreg + nf * lmul].u64[index] = 0;
> + nf--;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_widen(CPURISCVState *env, int vreg, int index,
> + int width)
> +{
> + switch (width) {
> + case 8:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 16:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_narrow(CPURISCVState *env, int vreg, int index,
> + int width)
> +{
> + switch (width) {
> + case 8:
> + env->vfp.vreg[vreg].u8[index] = 0;
> + break;
> + case 16:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_fcommon(CPURISCVState *env, int vreg, int index,
> + int width)
> +{
> + switch (width) {
> + case 16:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 64:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_fwiden(CPURISCVState *env, int vreg, int index,
> + int width)
> +{
> + switch (width) {
> + case 16:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u64[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +
> +static void vector_tail_fnarrow(CPURISCVState *env, int vreg, int index,
> + int width)
> +{
> + switch (width) {
> + case 16:
> + env->vfp.vreg[vreg].u16[index] = 0;
> + break;
> + case 32:
> + env->vfp.vreg[vreg].u32[index] = 0;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +}
> +static inline int vector_get_carry(CPURISCVState *env, int width, int
> lmul,
> + int index)
> +{
> + int mlen = width / lmul;
> + int idx = (index * mlen) / 8;
> + int pos = (index * mlen) % 8;
> +
> + return (env->vfp.vreg[0].u8[idx] >> pos) & 0x1;
> +}
> +
> +static inline void vector_get_layout(CPURISCVState *env, int width, int
> lmul,
> + int index, int *idx, int *pos)
> +{
> + int mlen = width / lmul;
> + *idx = (index * mlen) / 8;
> + *pos = (index * mlen) % 8;
> +}
> +
> +static bool vector_lmul_check_reg(CPURISCVState *env, uint32_t lmul,
> + uint32_t reg, bool widen)
> +{
> + int legal = widen ? (lmul * 2) : lmul;
> +
> + if ((lmul != 1 && lmul != 2 && lmul != 4 && lmul != 8) ||
> + (lmul == 8 && widen)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return false;
> + }
> +
> + if (reg % legal != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return false;
> + }
> + return true;
> +}
> +
> +static inline uint64_t u64xu64_lh(uint64_t a, uint64_t b)
> +{
> + uint64_t hi_64, carry;
> +
> +
> + /* first get the whole product in {hi_64, lo_64} */
> + uint64_t a_hi = a >> 32;
> + uint64_t a_lo = (uint32_t)a;
> + uint64_t b_hi = b >> 32;
> + uint64_t b_lo = (uint32_t)b;
> +
> + /*
> + * a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> + * (a_lo * b_hi) << 32 + a_lo * b_lo
> + * = {hi_64, lo_64}
> + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo *
> b_lo)) >> 64
> + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32)
> >> 32
> + */
> +
> + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + (uint64_t)(uint32_t)(a_lo * b_hi) +
> + ((a_lo * b_lo) >> 32)) >> 32;
> +
> + hi_64 = a_hi * b_hi +
> + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> + carry;
> +
> + return hi_64;
> +}
> +
> +
> +static inline int64_t s64xu64_lh(int64_t a, uint64_t b)
> +{
> + uint64_t abs_a = a;
> + uint64_t lo_64, hi_64, carry;
> +
> + if (a < 0) {
> + abs_a = ~a + 1;
> + }
> +
> + /* first get the whole product in {hi_64, lo_64} */
> + uint64_t a_hi = abs_a >> 32;
> + uint64_t a_lo = (uint32_t)abs_a;
> + uint64_t b_hi = b >> 32;
> + uint64_t b_lo = (uint32_t)b;
> +
> + /*
> + * abs_a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> + * (a_lo * b_hi) << 32 + a_lo * b_lo
> + * = {hi_64, lo_64}
> + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo *
> b_lo)) >> 64
> + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32)
> >> 32
> + */
> +
> + lo_64 = abs_a * b;
> + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + (uint64_t)(uint32_t)(a_lo * b_hi) +
> + ((a_lo * b_lo) >> 32)) >> 32;
> +
> + hi_64 = a_hi * b_hi +
> + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> + carry;
> + if ((a ^ b) & SIGNBIT64) {
> + lo_64 = ~lo_64;
> + hi_64 = ~hi_64;
> + if (lo_64 == MAX_U64) {
> + lo_64 = 0;
> + hi_64 += 1;
> + } else {
> + lo_64 += 1;
> + }
> + }
> + return hi_64;
> +}
> +
> +
> +static inline int64_t s64xs64_lh(int64_t a, int64_t b)
> +{
> + uint64_t abs_a = a, abs_b = b;
> + uint64_t lo_64, hi_64, carry;
> +
> + if (a < 0) {
> + abs_a = ~a + 1;
> + }
> + if (b < 0) {
> + abs_b = ~b + 1;
> + }
> +
> + /* first get the whole product in {hi_64, lo_64} */
> + uint64_t a_hi = abs_a >> 32;
> + uint64_t a_lo = (uint32_t)abs_a;
> + uint64_t b_hi = abs_b >> 32;
> + uint64_t b_lo = (uint32_t)abs_b;
> +
> + /*
> + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> + * (a_lo * b_hi) << 32 + a_lo * b_lo
> + * = {hi_64, lo_64}
> + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo *
> b_lo)) >> 64
> + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32)
> >> 32
> + */
> +
> + lo_64 = abs_a * abs_b;
> + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> + (uint64_t)(uint32_t)(a_lo * b_hi) +
> + ((a_lo * b_lo) >> 32)) >> 32;
> +
> + hi_64 = a_hi * b_hi +
> + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> + carry;
> +
> + if ((a ^ b) & SIGNBIT64) {
> + lo_64 = ~lo_64;
> + hi_64 = ~hi_64;
> + if (lo_64 == MAX_U64) {
> + lo_64 = 0;
> + hi_64 += 1;
> + } else {
> + lo_64 += 1;
> + }
> + }
> + return hi_64;
> +}
> +
> +void VECTOR_HELPER(vsetvl)(CPURISCVState *env, uint32_t rs1, uint32_t rs2,
> + uint32_t rd)
> +{
> + int sew, max_sew, vlmax, vl;
> +
> + if (rs2 == 0) {
> + vector_vtype_set_ill(env);
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + env->vfp.vtype = env->gpr[rs2];
> + sew = 1 << vector_get_width(env) / 8;
> + max_sew = sizeof(target_ulong);
> +
> +
> + if (env->misa & RVD) {
> + max_sew = max_sew > 8 ? max_sew : 8;
> + } else if (env->misa & RVF) {
> + max_sew = max_sew > 4 ? max_sew : 4;
> + }
> + if (sew > max_sew) {
> + vector_vtype_set_ill(env);
> + return;
> + }
> +
> + vlmax = vector_get_vlmax(env);
> + if (rs1 == 0) {
> + vl = vlmax;
> + } else if (env->gpr[rs1] <= vlmax) {
> + vl = env->gpr[rs1];
> + } else if (env->gpr[rs1] < 2 * vlmax) {
> + vl = ceil(env->gpr[rs1] / 2);
> + } else {
> + vl = vlmax;
> + }
> + env->vfp.vl = vl;
> + env->gpr[rd] = vl;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsetvli)(CPURISCVState *env, uint32_t rs1, uint32_t
> zimm,
> + uint32_t rd)
> +{
> + int sew, max_sew, vlmax, vl;
> +
> + env->vfp.vtype = zimm;
> + sew = vector_get_width(env) / 8;
> + max_sew = sizeof(target_ulong);
> +
> + if (env->misa & RVD) {
> + max_sew = max_sew > 8 ? max_sew : 8;
> + } else if (env->misa & RVF) {
> + max_sew = max_sew > 4 ? max_sew : 4;
> + }
> + if (sew > max_sew) {
> + vector_vtype_set_ill(env);
> + return;
> + }
> +
> + vlmax = vector_get_vlmax(env);
> + if (rs1 == 0) {
> + vl = vlmax;
> + } else if (env->gpr[rs1] <= vlmax) {
> + vl = env->gpr[rs1];
> + } else if (env->gpr[rs1] < 2 * vlmax) {
> + vl = ceil(env->gpr[rs1] / 2);
> + } else {
> + vl = vlmax;
> + }
> + env->vfp.vl = vl;
> + env->gpr[rd] = vl;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vrgather.vv vd, vs2, vs1, vm #
> + * vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]];
> + */
> +void VECTOR_HELPER(vrgather_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src, src1;
> + uint32_t index;
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + index = env->vfp.vreg[src1].u8[j];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u8[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[index];
> + }
> + }
> + break;
> + case 16:
> + index = env->vfp.vreg[src1].u16[j];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u16[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[index];
> + }
> + }
> + break;
> + case 32:
> + index = env->vfp.vreg[src1].u32[j];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u32[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[index];
> + }
> + }
> + break;
> + case 64:
> + index = env->vfp.vreg[src1].u64[j];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u64[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[index];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 :
> vs2[rs1] */
> +void VECTOR_HELPER(vrgather_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src;
> + uint32_t index;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + index = env->gpr[rs1];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u8[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[index];
> + }
> + }
> + break;
> + case 16:
> + index = env->gpr[rs1];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u16[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[index];
> + }
> + }
> + break;
> + case 32:
> + index = env->gpr[rs1];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u32[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[index];
> + }
> + }
> + break;
> + case 64:
> + index = env->gpr[rs1];
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u64[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[index];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm]
> */
> +void VECTOR_HELPER(vrgather_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src;
> + uint32_t index;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + index = rs1;
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u8[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[index];
> + }
> + }
> + break;
> + case 16:
> + index = rs1;
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u16[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[index];
> + }
> + }
> + break;
> + case 32:
> + index = rs1;
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u32[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[index];
> + }
> + }
> + break;
> + case 64:
> + index = rs1;
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (index >= vlmax) {
> + env->vfp.vreg[dest].u64[j] = 0;
> + } else {
> + src = rs2 + (index / (VLEN / width));
> + index = index % (VLEN / width);
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[index];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vext_x_v)(CPURISCVState *env, uint32_t rs1, uint32_t
> rs2,
> + uint32_t rd)
> +{
> + int width;
> + uint64_t elem;
> + target_ulong index = env->gpr[rs1];
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> +
> + elem = vector_get_iu_elem(env, width, rs2, index);
> + if (index >= VLEN / width) { /* index is too big */
> + env->gpr[rd] = 0;
> + } else {
> + env->gpr[rd] = elem;
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmv.f.s rd, vs2 # rd = vs2[0] (rs1=0) */
> +void VECTOR_HELPER(vfmv_f_s)(CPURISCVState *env, uint32_t rs1, uint32_t
> rs2,
> + uint32_t rd)
> +{
> + int width, flen;
> + uint64_t mask;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->misa & RVD) {
> + flen = 8;
> + } else if (env->misa & RVF) {
> + flen = 4;
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + mask = (~((uint64_t)0)) << width;
> +
> + if (width == 8) {
> + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s8[0] | mask;
> + } else if (width == 16) {
> + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s16[0] | mask;
> + } else if (width == 32) {
> + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s32[0] | mask;
> + } else if (width == 64) {
> + if (flen == 4) {
> + env->fpr[rd] = env->vfp.vreg[rs2].s64[0] & 0xffffffff;
> + } else {
> + env->fpr[rd] = env->vfp.vreg[rs2].s64[0];
> + }
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmv.s.x vd, rs1 # vd[0] = rs1 */
> +void VECTOR_HELPER(vmv_s_x)(CPURISCVState *env, uint32_t rs1, uint32_t
> rs2,
> + uint32_t rd)
> +{
> + int width;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= env->vfp.vl) {
> + return;
> + }
> +
> + memset(&env->vfp.vreg[rd].u8[0], 0, VLEN / 8);
> + width = vector_get_width(env);
> +
> + if (width == 8) {
> + env->vfp.vreg[rd].u8[0] = env->gpr[rs1];
> + } else if (width == 16) {
> + env->vfp.vreg[rd].u16[0] = env->gpr[rs1];
> + } else if (width == 32) {
> + env->vfp.vreg[rd].u32[0] = env->gpr[rs1];
> + } else if (width == 64) {
> + env->vfp.vreg[rd].u64[0] = env->gpr[rs1];
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2 = 0) */
> +void VECTOR_HELPER(vfmv_s_f)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, flen;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= env->vfp.vl) {
> + return;
> + }
> + if (env->misa & RVD) {
> + flen = 8;
> + } else if (env->misa & RVF) {
> + flen = 4;
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> +
> + if (width == 8) {
> + env->vfp.vreg[rd].u8[0] = env->fpr[rs1];
> + } else if (width == 16) {
> + env->vfp.vreg[rd].u16[0] = env->fpr[rs1];
> + } else if (width == 32) {
> + env->vfp.vreg[rd].u32[0] = env->fpr[rs1];
> + } else if (width == 64) {
> + if (flen == 4) { /* 1-extended to FLEN bits */
> + env->vfp.vreg[rd].u64[0] = (uint64_t)env->fpr[rs1]
> + | 0xffffffff00000000;
> + } else {
> + env->vfp.vreg[rd].u64[0] = env->fpr[rs1];
> + }
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
> +void VECTOR_HELPER(vslideup_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax, offset;
> + int i, j, dest, src, k;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + offset = env->gpr[rs1];
> +
> + if (offset < env->vfp.vstart) {
> + offset = env->vfp.vstart;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i - offset) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i - offset) % (VLEN / width);
> + if (i < offset) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslideup.vi vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
> +void VECTOR_HELPER(vslideup_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax, offset;
> + int i, j, dest, src, k;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + offset = rs1;
> +
> + if (offset < env->vfp.vstart) {
> + offset = env->vfp.vstart;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i - offset) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i - offset) % (VLEN / width);
> + if (i < offset) {
> + continue;
> + } else if (i < vl) {
> + if (width == 8) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[k];
> + }
> + } else if (width == 16) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + }
> + } else if (width == 32) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + }
> + } else if (width == 64) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + }
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
> +void VECTOR_HELPER(vslide1up_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src, k;
> + uint64_t s1;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + s1 = env->gpr[rs1];
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i - 1) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i - 1) % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i == 0 && env->vfp.vstart == 0) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = s1;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = s1;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = s1;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = s1;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src].u8[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i + rs1] */
> +void VECTOR_HELPER(vslidedown_vx)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax, offset;
> + int i, j, dest, src, k;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + offset = env->gpr[rs1];
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i + offset) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i + offset) % (VLEN / width);
> + if (i < offset) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[k];
> + } else {
> + env->vfp.vreg[dest].u8[j] = 0;
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + } else {
> + env->vfp.vreg[dest].u16[j] = 0;
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + } else {
> + env->vfp.vreg[dest].u32[j] = 0;
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + } else {
> + env->vfp.vreg[dest].u64[j] = 0;
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vslidedown_vi)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax, offset;
> + int i, j, dest, src, k;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + offset = rs1;
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i + offset) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i + offset) % (VLEN / width);
> + if (i < offset) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src].u8[k];
> + } else {
> + env->vfp.vreg[dest].u8[j] = 0;
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + } else {
> + env->vfp.vreg[dest].u16[j] = 0;
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + } else {
> + env->vfp.vreg[dest].u32[j] = 0;
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (i + offset < vlmax) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + } else {
> + env->vfp.vreg[dest].u64[j] = 0;
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vslide1down.vx vd, vs2, rs1, vm # vd[vl - 1]=x[rs1], vd[i] = vs2[i +
> 1] */
> +void VECTOR_HELPER(vslide1down_vx)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src, k;
> + uint64_t s1;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + s1 = env->gpr[rs1];
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src = rs2 + ((i + 1) / (VLEN / width));
> + j = i % (VLEN / width);
> + k = (i + 1) % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i == vl - 1 && i >= env->vfp.vstart) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = s1;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = s1;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = s1;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = s1;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else if (i < vl - 1) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src].u8[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src].u16[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src].u32[k];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src].u64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vcompress.vm vd, vs2, vs1
> + * Compress into vd elements of vs2 where vs1 is enabled
> + */
> +void VECTOR_HELPER(vcompress_vm)(CPURISCVState *env, uint32_t rs1,
> uint32_t rs2,
> + uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src;
> + uint32_t vd_idx, num = 0;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, 1)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + /* zeroed all elements */
> + for (i = 0; i < lmul; i++) {
> + memset(&env->vfp.vreg[rd + i].u64[0], 0, VLEN / 8);
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (num / (VLEN / width));
> + src = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + vd_idx = num % (VLEN / width);
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_mask_reg(env, rs1, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[vd_idx] =
> + env->vfp.vreg[src].u8[j];
> + num++;
> + }
> + break;
> + case 16:
> + if (vector_mask_reg(env, rs1, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[vd_idx] =
> + env->vfp.vreg[src].u16[j];
> + num++;
> + }
> + break;
> + case 32:
> + if (vector_mask_reg(env, rs1, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[vd_idx] =
> + env->vfp.vreg[src].u32[j];
> + num++;
> + }
> + break;
> + case 64:
> + if (vector_mask_reg(env, rs1, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[vd_idx] =
> + env->vfp.vreg[src].u64[j];
> + num++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
> + + env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + + env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + + env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + + env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + + env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredsum.vs vd, vs2, vs1, vm # vd[0] = sum(vs1[0] , vs2[*]) */
> +void VECTOR_HELPER(vredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> +
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t sum = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u8[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u8[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = sum;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u16[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u16[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = sum;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u32[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u32[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = sum;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u64[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u64[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = sum;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfadd.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_add(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_add(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_add(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfadd.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_add(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_add(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_add(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredand.vs vd, vs2, vs1, vm # vd[0] = and( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredand_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t res = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res &= env->vfp.vreg[src2].u8[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = res;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res &= env->vfp.vreg[src2].u16[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = res;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res &= env->vfp.vreg[src2].u32[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = res;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res &= env->vfp.vreg[src2].u64[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = res;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfredsum.vs vd, vs2, vs1, vm # Unordered sum */
> +void VECTOR_HELPER(vfredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + float16 sum16 = 0.0f;
> + float32 sum32 = 0.0f;
> + float64 sum64 = 0.0f;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 16:
> + if (i == 0) {
> + sum16 = env->vfp.vreg[rs1].f16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum16 = float16_add(sum16, env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f16[0] = sum16;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + sum32 = env->vfp.vreg[rs1].f32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum32 = float32_add(sum32, env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f32[0] = sum32;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + sum64 = env->vfp.vreg[rs1].f64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum64 = float64_add(sum64, env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f64[0] = sum64;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + - env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + - env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + - env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + - env->vfp.vreg[src1].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + - env->gpr[rs1];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + - env->gpr[rs1];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + - env->gpr[rs1];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + - (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredor.vs vd, vs2, vs1, vm # vd[0] = or( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredor_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t res = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res |= env->vfp.vreg[src2].u8[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = res;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res |= env->vfp.vreg[src2].u16[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = res;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res |= env->vfp.vreg[src2].u32[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = res;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res |= env->vfp.vreg[src2].u64[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = res;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsub.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_sub(
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[src1].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_sub(
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[src1].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_sub(
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[src1].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsub.vf vd, vs2, rs1, vm # Vector-scalar vd[i] = vs2[i] - f[rs1] */
> +void VECTOR_HELPER(vfsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_sub(
> +
> env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_sub(
> +
> env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_sub(
> +
> env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vrsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + - env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + - env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + - env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + - env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vrsub_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + - env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + - env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + - env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + - env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredxor.vs vd, vs2, vs1, vm # vd[0] = xor( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredxor_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t res = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res ^= env->vfp.vreg[src2].u8[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = res;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res ^= env->vfp.vreg[src2].u16[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = res;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res ^= env->vfp.vreg[src2].u32[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = res;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + res = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + res ^= env->vfp.vreg[src2].u64[j];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = res;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfredosum.vs vd, vs2, vs1, vm # Ordered sum */
> +void VECTOR_HELPER(vfredosum_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + helper_vector_vfredsum_vs(env, vm, rs1, rs2, rd);
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vminu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] <=
> + env->vfp.vreg[src2].u8[j]) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src1].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] <=
> + env->vfp.vreg[src2].u16[j]) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src1].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] <=
> + env->vfp.vreg[src2].u32[j]) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src1].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] <=
> + env->vfp.vreg[src2].u64[j]) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src1].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vminu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].u8[j]) {
> + env->vfp.vreg[dest].u8[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].u16[j]) {
> + env->vfp.vreg[dest].u16[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].u32[j]) {
> + env->vfp.vreg[dest].u32[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) <=
> + env->vfp.vreg[src2].u64[j]) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1]);
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredminu.vs vd, vs2, vs1, vm # vd[0] = minu( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredminu_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t minu = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + minu = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (minu > env->vfp.vreg[src2].u8[j]) {
> + minu = env->vfp.vreg[src2].u8[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = minu;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + minu = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (minu > env->vfp.vreg[src2].u16[j]) {
> + minu = env->vfp.vreg[src2].u16[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = minu;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + minu = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (minu > env->vfp.vreg[src2].u32[j]) {
> + minu = env->vfp.vreg[src2].u32[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = minu;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + minu = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (minu > env->vfp.vreg[src2].u64[j]) {
> + minu = env->vfp.vreg[src2].u64[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = minu;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmin.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfmin_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_minnum(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_minnum(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_minnum(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmin.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfmin_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_minnum(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_minnum(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_minnum(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmin_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s8[j] <=
> + env->vfp.vreg[src2].s8[j]) {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src1].s8[j];
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src2].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s16[j] <=
> + env->vfp.vreg[src2].s16[j]) {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src1].s16[j];
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src2].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s32[j] <=
> + env->vfp.vreg[src2].s32[j]) {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src1].s32[j];
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src2].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s64[j] <=
> + env->vfp.vreg[src2].s64[j]) {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src1].s64[j];
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src2].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmin_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int8_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].s8[j]) {
> + env->vfp.vreg[dest].s8[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src2].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int16_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].s16[j]) {
> + env->vfp.vreg[dest].s16[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src2].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int32_t)env->gpr[rs1] <=
> + env->vfp.vreg[src2].s32[j]) {
> + env->vfp.vreg[dest].s32[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src2].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int64_t)extend_gpr(env->gpr[rs1]) <=
> + env->vfp.vreg[src2].s64[j]) {
> + env->vfp.vreg[dest].s64[j] =
> + (int64_t)extend_gpr(env->gpr[rs1]);
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src2].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredmin.vs vd, vs2, vs1, vm # vd[0] = min( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredmin_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + int64_t min = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + min = env->vfp.vreg[rs1].s8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (min > env->vfp.vreg[src2].s8[j]) {
> + min = env->vfp.vreg[src2].s8[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s8[0] = min;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + min = env->vfp.vreg[rs1].s16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (min > env->vfp.vreg[src2].s16[j]) {
> + min = env->vfp.vreg[src2].s16[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s16[0] = min;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + min = env->vfp.vreg[rs1].s32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (min > env->vfp.vreg[src2].s32[j]) {
> + min = env->vfp.vreg[src2].s32[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s32[0] = min;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + min = env->vfp.vreg[rs1].s64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (min > env->vfp.vreg[src2].s64[j]) {
> + min = env->vfp.vreg[src2].s64[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s64[0] = min;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfredmin.vs vd, vs2, vs1, vm # Minimum value */
> +void VECTOR_HELPER(vfredmin_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + float16 min16 = 0.0f;
> + float32 min32 = 0.0f;
> + float64 min64 = 0.0f;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 16:
> + if (i == 0) {
> + min16 = env->vfp.vreg[rs1].f16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + min16 = float16_minnum(min16,
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f16[0] = min16;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + min32 = env->vfp.vreg[rs1].f32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + min32 = float32_minnum(min32,
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f32[0] = min32;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + min64 = env->vfp.vreg[rs1].f64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + min64 = float64_minnum(min64,
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f64[0] = min64;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmaxu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] >=
> + env->vfp.vreg[src2].u8[j]) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src1].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] >=
> + env->vfp.vreg[src2].u16[j]) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src1].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] >=
> + env->vfp.vreg[src2].u32[j]) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src1].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] >=
> + env->vfp.vreg[src2].u64[j]) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src1].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmaxu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].u8[j]) {
> + env->vfp.vreg[dest].u8[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].u16[j]) {
> + env->vfp.vreg[dest].u16[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].u32[j]) {
> + env->vfp.vreg[dest].u32[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) >=
> + env->vfp.vreg[src2].u64[j]) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1]);
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredmaxu.vs vd, vs2, vs1, vm # vd[0] = maxu( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredmaxu_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t maxu = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + maxu = env->vfp.vreg[rs1].u8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (maxu < env->vfp.vreg[src2].u8[j]) {
> + maxu = env->vfp.vreg[src2].u8[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u8[0] = maxu;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + maxu = env->vfp.vreg[rs1].u16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (maxu < env->vfp.vreg[src2].u16[j]) {
> + maxu = env->vfp.vreg[src2].u16[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = maxu;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + maxu = env->vfp.vreg[rs1].u32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (maxu < env->vfp.vreg[src2].u32[j]) {
> + maxu = env->vfp.vreg[src2].u32[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = maxu;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + maxu = env->vfp.vreg[rs1].u64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (maxu < env->vfp.vreg[src2].u64[j]) {
> + maxu = env->vfp.vreg[src2].u64[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = maxu;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*vfmax.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfmax_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_maxnum(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_maxnum(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_maxnum(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmax.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfmax_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_maxnum(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_maxnum(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_maxnum(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmax_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s8[j] >=
> + env->vfp.vreg[src2].s8[j]) {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src1].s8[j];
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src2].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s16[j] >=
> + env->vfp.vreg[src2].s16[j]) {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src1].s16[j];
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src2].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s32[j] >=
> + env->vfp.vreg[src2].s32[j]) {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src1].s32[j];
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src2].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s64[j] >=
> + env->vfp.vreg[src2].s64[j]) {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src1].s64[j];
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src2].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmax_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int8_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].s8[j]) {
> + env->vfp.vreg[dest].s8[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> + env->vfp.vreg[src2].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int16_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].s16[j]) {
> + env->vfp.vreg[dest].s16[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> + env->vfp.vreg[src2].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int32_t)env->gpr[rs1] >=
> + env->vfp.vreg[src2].s32[j]) {
> + env->vfp.vreg[dest].s32[j] =
> + env->gpr[rs1];
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> + env->vfp.vreg[src2].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int64_t)extend_gpr(env->gpr[rs1]) >=
> + env->vfp.vreg[src2].s64[j]) {
> + env->vfp.vreg[dest].s64[j] =
> + (int64_t)extend_gpr(env->gpr[rs1]);
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> + env->vfp.vreg[src2].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vredmax.vs vd, vs2, vs1, vm # vd[0] = max( vs1[0] , vs2[*] ) */
> +void VECTOR_HELPER(vredmax_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + int64_t max = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (i == 0) {
> + max = env->vfp.vreg[rs1].s8[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (max < env->vfp.vreg[src2].s8[j]) {
> + max = env->vfp.vreg[src2].s8[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s8[0] = max;
> + }
> + break;
> + case 16:
> + if (i == 0) {
> + max = env->vfp.vreg[rs1].s16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (max < env->vfp.vreg[src2].s16[j]) {
> + max = env->vfp.vreg[src2].s16[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s16[0] = max;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + max = env->vfp.vreg[rs1].s32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (max < env->vfp.vreg[src2].s32[j]) {
> + max = env->vfp.vreg[src2].s32[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s32[0] = max;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + max = env->vfp.vreg[rs1].s64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (max < env->vfp.vreg[src2].s64[j]) {
> + max = env->vfp.vreg[src2].s64[j];
> + }
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s64[0] = max;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfredmax.vs vd, vs2, vs1, vm # Maximum value */
> +void VECTOR_HELPER(vfredmax_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + float16 max16 = 0.0f;
> + float32 max32 = 0.0f;
> + float64 max64 = 0.0f;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 16:
> + if (i == 0) {
> + max16 = env->vfp.vreg[rs1].f16[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + max16 = float16_maxnum(max16,
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f16[0] = max16;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + max32 = env->vfp.vreg[rs1].f32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + max32 = float32_maxnum(max32,
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f32[0] = max32;
> + }
> + break;
> + case 64:
> + if (i == 0) {
> + max64 = env->vfp.vreg[rs1].f64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + max64 = float64_maxnum(max64,
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f64[0] = max64;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnj.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfsgnj_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> +
> env->vfp.vreg[src1].f16[j],
> + 0,
> + 15,
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> +
> env->vfp.vreg[src1].f32[j],
> + 0,
> + 31,
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> +
> env->vfp.vreg[src1].f64[j],
> + 0,
> + 63,
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnj.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfsgnj_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> + env->fpr[rs1],
> + 0,
> + 15,
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> + env->fpr[rs1],
> + 0,
> + 31,
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> + env->fpr[rs1],
> + 0,
> + 63,
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vand_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
> + & env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + & env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + & env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + & env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vand_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + & env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + & env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + & env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + & env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vand_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + & env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + & env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + & env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + & env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnjn.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfsgnjn_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> +
> ~env->vfp.vreg[src1].f16[j],
> + 0,
> + 15,
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> +
> ~env->vfp.vreg[src1].f32[j],
> + 0,
> + 31,
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> +
> ~env->vfp.vreg[src1].f64[j],
> + 0,
> + 63,
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +/* vfsgnjn.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfsgnjn_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> + ~env->fpr[rs1],
> + 0,
> + 15,
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> + ~env->fpr[rs1],
> + 0,
> + 31,
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> + ~env->fpr[rs1],
> + 0,
> + 63,
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vor_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
> + | env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + | env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + | env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + | env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vor_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + | env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + | env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + | env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + | env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vor_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + | env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + | env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + | env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + | env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnjx.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfsgnjx_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> +
> env->vfp.vreg[src1].f16[j] ^
> +
> env->vfp.vreg[src2].f16[j],
> + 0,
> + 15,
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> +
> env->vfp.vreg[src1].f32[j] ^
> +
> env->vfp.vreg[src2].f32[j],
> + 0,
> + 31,
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> +
> env->vfp.vreg[src1].f64[j] ^
> +
> env->vfp.vreg[src2].f64[j],
> + 0,
> + 63,
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfsgnjx.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfsgnjx_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = deposit16(
> + env->fpr[rs1] ^
> +
> env->vfp.vreg[src2].f16[j],
> + 0,
> + 15,
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = deposit32(
> + env->fpr[rs1] ^
> +
> env->vfp.vreg[src2].f32[j],
> + 0,
> + 31,
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = deposit64(
> + env->fpr[rs1] ^
> +
> env->vfp.vreg[src2].f64[j],
> + 0,
> + 63,
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vxor_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
> + ^ env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j]
> + ^ env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j]
> + ^ env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j]
> + ^ env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vxor_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + ^ env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + ^ env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + ^ env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + ^ env->vfp.vreg[src2].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vxor_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5)
> + ^ env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5)
> + ^ env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5)
> + ^ env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5)
> + ^ env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadc_vvm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax, carry;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
> + + env->vfp.vreg[src2].u8[j] + carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j]
> + + env->vfp.vreg[src2].u16[j] + carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j]
> + + env->vfp.vreg[src2].u32[j] + carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j]
> + + env->vfp.vreg[src2].u64[j] + carry;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vadc_vxm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax, carry;
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u8[j] + carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u16[j] + carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]
> + + env->vfp.vreg[src2].u32[j] + carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] =
> (uint64_t)extend_gpr(env->gpr[rs1])
> + + env->vfp.vreg[src2].u64[j] + carry;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vadc_vim)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax, carry;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u8[j] + carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u16[j] + carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u32[j] + carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] = sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u64[j] + carry;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmadc_vvm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax, carry;
> + uint64_t tmp;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src1].u8[j]
> + + env->vfp.vreg[src2].u8[j] + carry;
> + tmp = tmp >> width;
> +
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src1].u16[j]
> + + env->vfp.vreg[src2].u16[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)env->vfp.vreg[src1].u32[j]
> + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src1].u64[j]
> + + env->vfp.vreg[src2].u64[j] + carry;
> +
> + if ((tmp < env->vfp.vreg[src1].u64[j] ||
> + tmp < env->vfp.vreg[src2].u64[j])
> + || (env->vfp.vreg[src1].u64[j] == MAX_U64 &&
> + env->vfp.vreg[src2].u64[j] == MAX_U64)) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmadc_vxm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax, carry;
> + uint64_t tmp, extend_rs1;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint8_t)env->gpr[rs1]
> + + env->vfp.vreg[src2].u8[j] + carry;
> + tmp = tmp >> width;
> +
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint16_t)env->gpr[rs1]
> + + env->vfp.vreg[src2].u16[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)((uint32_t)env->gpr[rs1])
> + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> +
> + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]);
> + tmp = extend_rs1 + env->vfp.vreg[src2].u64[j] + carry;
> + if ((tmp < extend_rs1) ||
> + (carry && (env->vfp.vreg[src2].u64[j] == MAX_U64))) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmadc_vim)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax, carry;
> + uint64_t tmp;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint8_t)sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u8[j] + carry;
> + tmp = tmp >> width;
> +
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint16_t)sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u16[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)((uint32_t)sign_extend(rs1, 5))
> + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry;
> + tmp = tmp >> width;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)sign_extend(rs1, 5)
> + + env->vfp.vreg[src2].u64[j] + carry;
> +
> + if ((tmp < (uint64_t)sign_extend(rs1, 5) ||
> + tmp < env->vfp.vreg[src2].u64[j])
> + || ((uint64_t)sign_extend(rs1, 5) == MAX_U64 &&
> + env->vfp.vreg[src2].u64[j] == MAX_U64)) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsbc_vvm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax, carry;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + - env->vfp.vreg[src1].u8[j] - carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
> + - env->vfp.vreg[src1].u16[j] - carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
> + - env->vfp.vreg[src1].u32[j] - carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
> + - env->vfp.vreg[src1].u64[j] - carry;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsbc_vxm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax, carry;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + - env->gpr[rs1] - carry;
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]
> + - env->gpr[rs1] - carry;
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]
> + - env->gpr[rs1] - carry;
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]
> + - (uint64_t)extend_gpr(env->gpr[rs1]) - carry;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsbc_vvm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax, carry;
> + uint64_t tmp;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u8[j]
> + - env->vfp.vreg[src1].u8[j] - carry;
> + tmp = (tmp >> width) & 0x1;
> +
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u16[j]
> + - env->vfp.vreg[src1].u16[j] - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)env->vfp.vreg[src2].u32[j]
> + - (uint64_t)env->vfp.vreg[src1].u32[j] - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u64[j]
> + - env->vfp.vreg[src1].u64[j] - carry;
> +
> + if (((env->vfp.vreg[src1].u64[j] == MAX_U64) && carry) ||
> + env->vfp.vreg[src2].u64[j] <
> + (env->vfp.vreg[src1].u64[j] + carry)) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsbc_vxm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax, carry;
> + uint64_t tmp, extend_rs1;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul)
> + || (rd == 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u8[j]
> + - (uint8_t)env->gpr[rs1] - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 16:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = env->vfp.vreg[src2].u16[j]
> + - (uint16_t)env->gpr[rs1] - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 32:
> + carry = vector_get_carry(env, width, lmul, i);
> + tmp = (uint64_t)env->vfp.vreg[src2].u32[j]
> + - (uint64_t)((uint32_t)env->gpr[rs1]) - carry;
> + tmp = (tmp >> width) & 0x1;
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> + case 64:
> + carry = vector_get_carry(env, width, lmul, i);
> +
> + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]);
> + tmp = env->vfp.vreg[src2].u64[j] - extend_rs1 - carry;
> +
> + if ((tmp > env->vfp.vreg[src2].u64[j]) ||
> + ((extend_rs1 == MAX_U64) && carry)) {
> + tmp = 1;
> + } else {
> + tmp = 0;
> + }
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + break;
> +
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmpopc.m rd, vs2, v0.t # x[rd] = sum_i ( vs2[i].LSB && v0[i].LSB ) */
> +void VECTOR_HELPER(vmpopc_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + env->gpr[rd] = 0;
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_mask_reg(env, rs2, width, lmul, i) &&
> + vector_elem_mask(env, vm, width, lmul, i)) {
> + env->gpr[rd]++;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfirst.m rd, vs2, vm */
> +void VECTOR_HELPER(vmfirst_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_mask_reg(env, rs2, width, lmul, i) &&
> + vector_elem_mask(env, vm, width, lmul, i)) {
> + env->gpr[rd] = i;
> + break;
> + }
> + } else {
> + env->gpr[rd] = -1;
> + }
> + }
> +
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmerge_vvm)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl, idx, pos;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src1].u8[j];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src1].u16[j];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src1].u32[j];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + case 64:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src1].u64[j];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src1].u64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmerge_vxm)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl, idx, pos;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u8[j] = env->gpr[rs1];
> + }
> + break;
> + case 16:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u16[j] = env->gpr[rs1];
> + }
> + break;
> + case 32:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1];
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u32[j] = env->gpr[rs1];
> + }
> + break;
> + case 64:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmerge_vim)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl, idx, pos;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u8[j] =
> + env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> + (uint8_t)sign_extend(rs1, 5);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u8[j] = (uint8_t)sign_extend(rs1,
> 5);
> + }
> + break;
> + case 16:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u16[j] =
> + env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> + (uint16_t)sign_extend(rs1, 5);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u16[j] =
> (uint16_t)sign_extend(rs1, 5);
> + }
> + break;
> + case 32:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u32[j] =
> + env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> + (uint32_t)sign_extend(rs1, 5);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u32[j] =
> (uint32_t)sign_extend(rs1, 5);
> + }
> + break;
> + case 64:
> + if (vm == 0) {
> + vector_get_layout(env, width, lmul, i, &idx, &pos);
> + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) {
> + env->vfp.vreg[dest].u64[j] =
> + env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> + (uint64_t)sign_extend(rs1, 5);
> + }
> + } else {
> + if (rs2 != 0) {
> + riscv_raise_exception(env,
> + RISCV_EXCP_ILLEGAL_INST, GETPC());
> + }
> + env->vfp.vreg[dest].u64[j] =
> (uint64_t)sign_extend(rs1, 5);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmerge.vfm vd, vs2, rs1, v0 # vd[i] = v0[i].LSB ? f[rs1] : vs2[i] */
> +void VECTOR_HELPER(vfmerge_vfm)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* vfmv.v.f vd, rs1 # vd[i] = f[rs1]; */
> + if (vm && (rs2 != 0)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = env->fpr[rs1];
> + } else {
> + env->vfp.vreg[dest].f16[j] =
> env->vfp.vreg[src2].f16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = env->fpr[rs1];
> + } else {
> + env->vfp.vreg[dest].f32[j] =
> env->vfp.vreg[src2].f32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = env->fpr[rs1];
> + } else {
> + env->vfp.vreg[dest].f64[j] =
> env->vfp.vreg[src2].f64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmseq_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] ==
> + env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] ==
> + env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] ==
> + env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] ==
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmseq_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] ==
> env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] ==
> env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] ==
> env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) ==
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmseq_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)sign_extend(rs1, 5)
> + == env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)sign_extend(rs1, 5)
> + == env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)sign_extend(rs1, 5)
> + == env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)sign_extend(rs1, 5) ==
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmandnot.mm vd, vs2, vs1 # vd = vs2 & ~vs1 */
> +void VECTOR_HELPER(vmandnot_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) &
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfeq.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmfeq_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_eq_quiet(env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_eq_quiet(env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_eq_quiet(env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfeq.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfeq_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmsne_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] !=
> + env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] !=
> + env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] !=
> + env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] !=
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsne_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] !=
> env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] !=
> env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] !=
> env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) !=
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsne_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)sign_extend(rs1, 5)
> + != env->vfp.vreg[src2].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)sign_extend(rs1, 5)
> + != env->vfp.vreg[src2].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)sign_extend(rs1, 5)
> + != env->vfp.vreg[src2].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)sign_extend(rs1, 5) !=
> + env->vfp.vreg[src2].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmand.mm vd, vs2, vs1 # vd = vs2 & vs1 */
> +void VECTOR_HELPER(vmand_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) &
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfle.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmfle_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_le(env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[src1].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_le(env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[src1].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_le(env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[src1].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfle.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfle_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_le(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_le(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_le(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsltu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <
> + env->vfp.vreg[src1].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <
> + env->vfp.vreg[src1].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <
> + env->vfp.vreg[src1].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <
> + env->vfp.vreg[src1].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsltu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <
> (uint8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <
> (uint16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <
> (uint32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <
> + (uint64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmor.mm vd, vs2, vs1 # vd = vs2 | vs1 */
> +void VECTOR_HELPER(vmor_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) |
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmford.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmford_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float16_unordered_quiet(env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float32_unordered_quiet(env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float64_unordered_quiet(env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmford.vf vd, vs2, rs1, vm # Vector-scalar */
> +void VECTOR_HELPER(vmford_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float16_unordered_quiet(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float32_unordered_quiet(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result =
> float64_unordered_quiet(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmslt_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <
> + env->vfp.vreg[src1].s8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <
> + env->vfp.vreg[src1].s16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <
> + env->vfp.vreg[src1].s32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <
> + env->vfp.vreg[src1].s64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmslt_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <
> (int8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <
> (int16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <
> (int32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <
> + (int64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmxor.mm vd, vs2, vs1 # vd = vs2 ^ vs1 */
> +void VECTOR_HELPER(vmxor_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmflt.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmflt_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_lt(env->vfp.vreg[src2].f16[j],
> + env->vfp.vreg[src1].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_lt(env->vfp.vreg[src2].f32[j],
> + env->vfp.vreg[src1].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_lt(env->vfp.vreg[src2].f64[j],
> + env->vfp.vreg[src1].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmflt.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmflt_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_lt(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_lt(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_lt(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsleu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <=
> + env->vfp.vreg[src1].u8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <=
> + env->vfp.vreg[src1].u16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <=
> + env->vfp.vreg[src1].u32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <=
> + env->vfp.vreg[src1].u64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsleu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <=
> (uint8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <=
> (uint16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <=
> (uint32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <=
> + (uint64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsleu_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] <= (uint8_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] <= (uint16_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] <= (uint32_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] <=
> + (uint64_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmornot.mm vd, vs2, vs1 # vd = vs2 | ~vs1 */
> +void VECTOR_HELPER(vmornot_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) |
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfne.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vmfne_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src1, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_eq_quiet(env->vfp.vreg[src1].f16[j],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_eq_quiet(env->vfp.vreg[src1].f32[j],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_eq_quiet(env->vfp.vreg[src1].f64[j],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfne.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfne_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_eq_quiet(env->fpr[rs1],
> + env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsle_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <=
> + env->vfp.vreg[src1].s8[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <=
> + env->vfp.vreg[src1].s16[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <=
> + env->vfp.vreg[src1].s32[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <=
> + env->vfp.vreg[src1].s64[j]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsle_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <=
> (int8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <=
> (int16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <=
> (int32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <=
> + (int64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsle_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] <=
> + (int8_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] <=
> + (int16_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] <=
> + (int32_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] <=
> + sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmnand.mm vd, vs2, vs1 # vd = ~(vs2 & vs1) */
> +void VECTOR_HELPER(vmnand_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) &
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, (~tmp & 0x1));
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfgt.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfgt_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_le(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_le(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_le(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsgtu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] >
> (uint8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] >
> (uint16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] >
> (uint32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] >
> + (uint64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsgtu_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u8[j] > (uint8_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u16[j] > (uint16_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u32[j] > (uint32_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].u64[j] >
> + (uint64_t)rs1) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmnor.mm vd, vs2, vs1 # vd = ~(vs2 | vs1) */
> +void VECTOR_HELPER(vmnor_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) |
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmsgt_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] >
> (int8_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] >
> (int16_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] >
> (int32_t)env->gpr[rs1]) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] >
> + (int64_t)extend_gpr(env->gpr[rs1])) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmsgt_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s8[j] >
> + (int8_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s16[j] >
> + (int16_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s32[j] >
> + (int32_t)sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src2].s64[j] >
> + sign_extend(rs1, 5)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + if (width <= 64) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +/* vmxnor.mm vd, vs2, vs1 # vd = ~(vs2 ^ vs1) */
> +void VECTOR_HELPER(vmxnor_mm)(CPURISCVState *env, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, i, vlmax;
> + uint32_t tmp;
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^
> + vector_mask_reg(env, rs2, width, lmul, i);
> + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1);
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmfge.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vmfge_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2, result;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float16_lt(env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float32_lt(env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + result = float64_lt(env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + vector_mask_result(env, rd, width, lmul, i, !result);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + case 32:
> + case 64:
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsaddu.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vsaddu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_add_u8(env,
> + env->vfp.vreg[src1].u8[j],
> env->vfp.vreg[src2].u8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_add_u16(env,
> + env->vfp.vreg[src1].u16[j],
> env->vfp.vreg[src2].u16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_add_u32(env,
> + env->vfp.vreg[src1].u32[j],
> env->vfp.vreg[src2].u32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_add_u64(env,
> + env->vfp.vreg[src1].u64[j],
> env->vfp.vreg[src2].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsaddu.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vsaddu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_add_u8(env,
> + env->vfp.vreg[src2].u8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_add_u16(env,
> + env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_add_u32(env,
> + env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_add_u64(env,
> + env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsaddu.vi vd, vs2, imm, vm # vector-immediate */
> +void VECTOR_HELPER(vsaddu_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_add_u8(env,
> + env->vfp.vreg[src2].u8[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_add_u16(env,
> + env->vfp.vreg[src2].u16[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_add_u32(env,
> + env->vfp.vreg[src2].u32[j], rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_add_u64(env,
> + env->vfp.vreg[src2].u64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vdivu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] == 0) {
> + env->vfp.vreg[dest].u8[j] = MAX_U8;
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j] /
> + env->vfp.vreg[src1].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] == 0) {
> + env->vfp.vreg[dest].u16[j] = MAX_U16;
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + / env->vfp.vreg[src1].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] == 0) {
> + env->vfp.vreg[dest].u32[j] = MAX_U32;
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + / env->vfp.vreg[src1].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] == 0) {
> + env->vfp.vreg[dest].u64[j] = MAX_U64;
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + / env->vfp.vreg[src1].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vdivu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u8[j] = MAX_U8;
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j] /
> + (uint8_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u16[j] = MAX_U16;
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + / (uint16_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u32[j] = MAX_U32;
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + / (uint32_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) {
> + env->vfp.vreg[dest].u64[j] = MAX_U64;
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + / (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfdiv.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_div(
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[src1].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_div(
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[src1].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_div(
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[src1].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfdiv.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_div(
> +
> env->vfp.vreg[src2].f16[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_div(
> +
> env->vfp.vreg[src2].f32[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_div(
> +
> env->vfp.vreg[src2].f64[j],
> + env->fpr[rs1],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsadd.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vsadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_add_s8(env,
> + env->vfp.vreg[src1].s8[j],
> env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_add_s16(env,
> + env->vfp.vreg[src1].s16[j],
> env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_add_s32(env,
> + env->vfp.vreg[src1].s32[j],
> env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_add_s64(env,
> + env->vfp.vreg[src1].s64[j],
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsadd.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vsadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_add_s8(env,
> + env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_add_s16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_add_s32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_add_s64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsadd.vi vd, vs2, imm, vm # vector-immediate */
> +void VECTOR_HELPER(vsadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_add_s8(env,
> + env->vfp.vreg[src2].s8[j], sign_extend(rs1, 5));
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_add_s16(env,
> + env->vfp.vreg[src2].s16[j], sign_extend(rs1, 5));
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_add_s32(env,
> + env->vfp.vreg[src2].s32[j], sign_extend(rs1, 5));
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_add_s64(env,
> + env->vfp.vreg[src2].s64[j], sign_extend(rs1, 5));
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s8[j] == 0) {
> + env->vfp.vreg[dest].s8[j] = -1;
> + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) &&
> + (env->vfp.vreg[src1].s8[j] == (int8_t)(-1))) {
> + env->vfp.vreg[dest].s8[j] = MIN_S8;
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j] /
> + env->vfp.vreg[src1].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s16[j] == 0) {
> + env->vfp.vreg[dest].s16[j] = -1;
> + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) &&
> + (env->vfp.vreg[src1].s16[j] == (int16_t)(-1))) {
> + env->vfp.vreg[dest].s16[j] = MIN_S16;
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + / env->vfp.vreg[src1].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s32[j] == 0) {
> + env->vfp.vreg[dest].s32[j] = -1;
> + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) &&
> + (env->vfp.vreg[src1].s32[j] == (int32_t)(-1))) {
> + env->vfp.vreg[dest].s32[j] = MIN_S32;
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + / env->vfp.vreg[src1].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s64[j] == 0) {
> + env->vfp.vreg[dest].s64[j] = -1;
> + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) &&
> + (env->vfp.vreg[src1].s64[j] == (int64_t)(-1))) {
> + env->vfp.vreg[dest].s64[j] = MIN_S64;
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + / env->vfp.vreg[src1].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vdiv_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int8_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s8[j] = -1;
> + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) &&
> + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) {
> + env->vfp.vreg[dest].s8[j] = MIN_S8;
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j] /
> + (int8_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int16_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s16[j] = -1;
> + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) &&
> + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) {
> + env->vfp.vreg[dest].s16[j] = MIN_S16;
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + / (int16_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int32_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s32[j] = -1;
> + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) &&
> + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) {
> + env->vfp.vreg[dest].s32[j] = MIN_S32;
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + / (int32_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) {
> + env->vfp.vreg[dest].s64[j] = -1;
> + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) &&
> + ((int64_t)extend_gpr(env->gpr[rs1]) ==
> (int64_t)(-1))) {
> + env->vfp.vreg[dest].s64[j] = MIN_S64;
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + / (int64_t)extend_gpr(env->gpr[rs1]);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i] */
> +void VECTOR_HELPER(vfrdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_div(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_div(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_div(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssubu.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vssubu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env,
> + env->vfp.vreg[src2].u8[j],
> env->vfp.vreg[src1].u8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env,
> + env->vfp.vreg[src2].u16[j],
> env->vfp.vreg[src1].u16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env,
> + env->vfp.vreg[src2].u32[j],
> env->vfp.vreg[src1].u32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env,
> + env->vfp.vreg[src2].u64[j],
> env->vfp.vreg[src1].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssubu.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vssubu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env,
> + env->vfp.vreg[src2].u8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env,
> + env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env,
> + env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env,
> + env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vremu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u8[j] == 0) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j] %
> + env->vfp.vreg[src1].u8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u16[j] == 0) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + % env->vfp.vreg[src1].u16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u32[j] == 0) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + % env->vfp.vreg[src1].u32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].u64[j] == 0) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + % env->vfp.vreg[src1].u64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vremu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint8_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j];
> + } else {
> + env->vfp.vreg[dest].u8[j] =
> env->vfp.vreg[src2].u8[j] %
> + (uint8_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint16_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j];
> + } else {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + % (uint16_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint32_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j];
> + } else {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + % (uint32_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j];
> + } else {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + % (uint64_t)extend_gpr(env->gpr[rs1]);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmsbf.m vd, vs2, vm # set-before-first mask bit */
> +void VECTOR_HELPER(vmsbf_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + bool first_mask_bit = false;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (first_mask_bit) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + continue;
> + }
> + if (!vector_mask_reg(env, rs2, width, lmul, i)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + first_mask_bit = true;
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmsif.m vd, vs2, vm # set-including-first mask bit */
> +void VECTOR_HELPER(vmsif_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + bool first_mask_bit = false;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (first_mask_bit) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + continue;
> + }
> + if (!vector_mask_reg(env, rs2, width, lmul, i)) {
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + } else {
> + first_mask_bit = true;
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + }
> + }
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vmsof.m vd, vs2, vm # set-only-first mask bit */
> +void VECTOR_HELPER(vmsof_m)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i;
> + bool first_mask_bit = false;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + if (i < vl) {
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (first_mask_bit) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + continue;
> + }
> + if (!vector_mask_reg(env, rs2, width, lmul, i)) {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + } else {
> + first_mask_bit = true;
> + vector_mask_result(env, rd, width, lmul, i, 1);
> + }
> + }
> + } else {
> + vector_mask_result(env, rd, width, lmul, i, 0);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* viota.m v4, v2, v0.t */
> +void VECTOR_HELPER(viota_m)(CPURISCVState *env, uint32_t vm, uint32_t rs2,
> + uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest;
> + uint32_t sum = 0;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 1)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = sum;
> + if (vector_mask_reg(env, rs2, width, lmul, i)) {
> + sum++;
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = sum;
> + if (vector_mask_reg(env, rs2, width, lmul, i)) {
> + sum++;
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = sum;
> + if (vector_mask_reg(env, rs2, width, lmul, i)) {
> + sum++;
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = sum;
> + if (vector_mask_reg(env, rs2, width, lmul, i)) {
> + sum++;
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vid.v vd, vm # Write element ID to destination. */
> +void VECTOR_HELPER(vid_v)(CPURISCVState *env, uint32_t vm, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = i;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = i;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = i;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = i;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssub.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vssub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env,
> + env->vfp.vreg[src2].s8[j],
> env->vfp.vreg[src1].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env,
> + env->vfp.vreg[src2].s16[j],
> env->vfp.vreg[src1].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env,
> + env->vfp.vreg[src2].s32[j],
> env->vfp.vreg[src1].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env,
> + env->vfp.vreg[src2].s64[j],
> env->vfp.vreg[src1].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssub.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vssub_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env,
> + env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vrem_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s8[j] == 0) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j];
> + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) &&
> + (env->vfp.vreg[src1].s8[j] == (int8_t)(-1))) {
> + env->vfp.vreg[dest].s8[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j] %
> + env->vfp.vreg[src1].s8[j];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s16[j] == 0) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j];
> + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) &&
> + (env->vfp.vreg[src1].s16[j] == (int16_t)(-1))) {
> + env->vfp.vreg[dest].s16[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + % env->vfp.vreg[src1].s16[j];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s32[j] == 0) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j];
> + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) &&
> + (env->vfp.vreg[src1].s32[j] == (int32_t)(-1))) {
> + env->vfp.vreg[dest].s32[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + % env->vfp.vreg[src1].s32[j];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if (env->vfp.vreg[src1].s64[j] == 0) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j];
> + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) &&
> + (env->vfp.vreg[src1].s64[j] == (int64_t)(-1))) {
> + env->vfp.vreg[dest].s64[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + % env->vfp.vreg[src1].s64[j];
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vrem_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int8_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j];
> + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) &&
> + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) {
> + env->vfp.vreg[dest].s8[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s8[j] =
> env->vfp.vreg[src2].s8[j] %
> + (int8_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int16_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j];
> + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) &&
> + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) {
> + env->vfp.vreg[dest].s16[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + % (int16_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int32_t)env->gpr[rs1] == 0) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j];
> + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) &&
> + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) {
> + env->vfp.vreg[dest].s32[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + % (int32_t)env->gpr[rs1];
> + }
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j];
> + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) &&
> + ((int64_t)extend_gpr(env->gpr[rs1]) ==
> (int64_t)(-1))) {
> + env->vfp.vreg[dest].s64[j] = 0;
> + } else {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + % (int64_t)extend_gpr(env->gpr[rs1]);
> + }
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vaadd.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vaadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(env,
> + env->vfp.vreg[src1].s8[j],
> env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(env,
> + env->vfp.vreg[src1].s16[j],
> env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(env,
> + env->vfp.vreg[src1].s32[j],
> env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(env,
> + env->vfp.vreg[src1].s64[j],
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vaadd.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vaadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(env,
> + env->gpr[rs1], env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(env,
> + env->gpr[rs1], env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(env,
> + env->gpr[rs1], env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(env,
> + env->gpr[rs1], env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vaadd.vi vd, vs2, imm, vm # vector-immediate */
> +void VECTOR_HELPER(vaadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(env,
> + rs1, env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(env,
> + rs1, env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(env,
> + rs1, env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(env,
> + rs1, env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmulhu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> + ((uint16_t)env->vfp.vreg[src1].u8[j]
> + * (uint16_t)env->vfp.vreg[src2].u8[j]) >> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + ((uint32_t)env->vfp.vreg[src1].u16[j]
> + * (uint32_t)env->vfp.vreg[src2].u16[j]) >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + ((uint64_t)env->vfp.vreg[src1].u32[j]
> + * (uint64_t)env->vfp.vreg[src2].u32[j]) >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = u64xu64_lh(
> + env->vfp.vreg[src1].u64[j],
> env->vfp.vreg[src2].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmulhu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] =
> + ((uint16_t)(uint8_t)env->gpr[rs1]
> + * (uint16_t)env->vfp.vreg[src2].u8[j]) >> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> + ((uint32_t)(uint16_t)env->gpr[rs1]
> + * (uint32_t)env->vfp.vreg[src2].u16[j]) >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> + ((uint64_t)(uint32_t)env->gpr[rs1]
> + * (uint64_t)env->vfp.vreg[src2].u32[j]) >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = u64xu64_lh(
> + (uint64_t)extend_gpr(env->gpr[rs1])
> + , env->vfp.vreg[src2].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmul.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vfmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_mul(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_mul(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_mul(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmul.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_mul(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_mul(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_mul(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsll_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + << (env->vfp.vreg[src1].u8[j] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + << (env->vfp.vreg[src1].u16[j] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + << (env->vfp.vreg[src1].u32[j] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + << (env->vfp.vreg[src1].u64[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsll_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + << (env->gpr[rs1] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + << (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + << (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + << ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsll_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + << (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + << (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + << (rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + << (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->gpr[rs1]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = env->gpr[rs1]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = env->gpr[rs1]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> + (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vasub.vv vd, vs2, vs1, vm # Vector-vector */
> +void VECTOR_HELPER(vasub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(
> + env,
> + ~env->vfp.vreg[src1].s8[j] + 1,
> + env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(
> + env,
> + ~env->vfp.vreg[src1].s16[j] + 1,
> + env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(
> + env,
> + ~env->vfp.vreg[src1].s32[j] + 1,
> + env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(
> + env,
> + ~env->vfp.vreg[src1].s64[j] + 1,
> + env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vasub.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vasub_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = avg_round_s8(
> + env, ~env->gpr[rs1] + 1,
> env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = avg_round_s16(
> + env, ~env->gpr[rs1] + 1,
> env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = avg_round_s32(
> + env, ~env->gpr[rs1] + 1,
> env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = avg_round_s64(
> + env, ~env->gpr[rs1] + 1,
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmulhsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> + ((uint16_t)env->vfp.vreg[src1].u8[j]
> + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> + ((uint32_t)env->vfp.vreg[src1].u16[j]
> + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> + ((uint64_t)env->vfp.vreg[src1].u32[j]
> + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = s64xu64_lh(
> + env->vfp.vreg[src2].s64[j],
> env->vfp.vreg[src1].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmulhsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> + ((uint16_t)(uint8_t)env->gpr[rs1]
> + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> + ((uint32_t)(uint16_t)env->gpr[rs1]
> + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> + ((uint64_t)(uint32_t)env->gpr[rs1]
> + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = s64xu64_lh(
> + env->vfp.vreg[src2].s64[j],
> + (uint64_t)extend_gpr(env->gpr[rs1]));
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsmul.vv vd, vs2, vs1, vm # vd[i] =
> clip((vs2[i]*vs1[i]+round)>>(SEW-1)) */
> +void VECTOR_HELPER(vsmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if ((!(vm)) && rd == 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vsmul_8(env,
> + env->vfp.vreg[src1].s8[j],
> env->vfp.vreg[src2].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vsmul_16(env,
> + env->vfp.vreg[src1].s16[j],
> env->vfp.vreg[src2].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vsmul_32(env,
> + env->vfp.vreg[src1].s32[j],
> env->vfp.vreg[src2].s32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vsmul_64(env,
> + env->vfp.vreg[src1].s64[j],
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vsmul.vx vd, vs2, rs1, vm # vd[i] =
> clip((vs2[i]*x[rs1]+round)>>(SEW-1)) */
> +void VECTOR_HELPER(vsmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if ((!(vm)) && rd == 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vsmul_8(env,
> + env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vsmul_16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vsmul_32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vsmul_64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vmulh_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> + ((int16_t)env->vfp.vreg[src1].s8[j]
> + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> + ((int32_t)env->vfp.vreg[src1].s16[j]
> + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> + ((int64_t)env->vfp.vreg[src1].s32[j]
> + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = s64xs64_lh(
> + env->vfp.vreg[src1].s64[j],
> env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmulh_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] =
> + ((int16_t)(int8_t)env->gpr[rs1]
> + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> + ((int32_t)(int16_t)env->gpr[rs1]
> + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> + ((int64_t)(int32_t)env->gpr[rs1]
> + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = s64xs64_lh(
> + (int64_t)extend_gpr(env->gpr[rs1])
> + , env->vfp.vreg[src2].s64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfrsub.vf vd, vs2, rs1, vm # Scalar-vector vd[i] = f[rs1] - vs2[i] */
> +void VECTOR_HELPER(vfrsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_sub(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_sub(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_sub(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + >> (env->vfp.vreg[src1].u8[j] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + >> (env->vfp.vreg[src1].u16[j] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + >> (env->vfp.vreg[src1].u32[j] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + >> (env->vfp.vreg[src1].u64[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + >> (env->gpr[rs1] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + >> (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + >> (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + >> ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]
> + >> (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u16[j]
> + >> (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u32[j]
> + >> (rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] =
> env->vfp.vreg[src2].u64[j]
> + >> (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmadd.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) + vs2[i] */
> +void VECTOR_HELPER(vfmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmadd.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) + vs2[i] */
> +void VECTOR_HELPER(vfmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vsra_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
> + >> (env->vfp.vreg[src1].s8[j] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + >> (env->vfp.vreg[src1].s16[j] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + >> (env->vfp.vreg[src1].s32[j] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + >> (env->vfp.vreg[src1].s64[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsra_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
> + >> (env->gpr[rs1] & 0x7);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + >> (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + >> (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + >> ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsra_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
> + >> (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + >> (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + >> (rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + >> (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[dest].s8[j]
> + + env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[dest].s16[j]
> + + env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[dest].s32[j]
> + + env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[dest].s64[j]
> + + env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->gpr[rs1]
> + * env->vfp.vreg[dest].s8[j]
> + + env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = env->gpr[rs1]
> + * env->vfp.vreg[dest].s16[j]
> + + env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = env->gpr[rs1]
> + * env->vfp.vreg[dest].s32[j]
> + + env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> + (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[dest].s64[j]
> + + env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmadd.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) - vs2[i] */
> +void VECTOR_HELPER(vfnmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmadd.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) - vs2[i] */
> +void VECTOR_HELPER(vfnmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssrl.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i] */
> +void VECTOR_HELPER(vssrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = vssrl_8(env,
> + env->vfp.vreg[src2].u8[j],
> env->vfp.vreg[src1].u8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = vssrl_16(env,
> + env->vfp.vreg[src2].u16[j],
> env->vfp.vreg[src1].u16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = vssrl_32(env,
> + env->vfp.vreg[src2].u32[j],
> env->vfp.vreg[src1].u32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = vssrl_64(env,
> + env->vfp.vreg[src2].u64[j],
> env->vfp.vreg[src1].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssrl.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */
> +void VECTOR_HELPER(vssrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = vssrl_8(env,
> + env->vfp.vreg[src2].u8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = vssrl_16(env,
> + env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = vssrl_32(env,
> + env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = vssrl_64(env,
> + env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssrl.vi vd, vs2, imm, vm # vd[i] = ((vs2[i] + round)>>imm) */
> +void VECTOR_HELPER(vssrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = vssrli_8(env,
> + env->vfp.vreg[src2].u8[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = vssrli_16(env,
> + env->vfp.vreg[src2].u16[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = vssrli_32(env,
> + env->vfp.vreg[src2].u32[j], rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = vssrli_64(env,
> + env->vfp.vreg[src2].u64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmsub.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) - vs2[i] */
> +void VECTOR_HELPER(vfmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmsub.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) - vs2[i] */
> +void VECTOR_HELPER(vfmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssra.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i]) */
> +void VECTOR_HELPER(vssra_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vssra_8(env,
> + env->vfp.vreg[src2].s8[j],
> env->vfp.vreg[src1].u8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vssra_16(env,
> + env->vfp.vreg[src2].s16[j],
> env->vfp.vreg[src1].u16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vssra_32(env,
> + env->vfp.vreg[src2].s32[j],
> env->vfp.vreg[src1].u32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vssra_64(env,
> + env->vfp.vreg[src2].s64[j],
> env->vfp.vreg[src1].u64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssra.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */
> +void VECTOR_HELPER(vssra_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vssra_8(env,
> + env->vfp.vreg[src2].s8[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vssra_16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vssra_32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vssra_64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vssra.vi vd, vs2, imm, vm # vd[i] = ((vs2[i] + round)>>imm) */
> +void VECTOR_HELPER(vssra_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = vssrai_8(env,
> + env->vfp.vreg[src2].s8[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = vssrai_16(env,
> + env->vfp.vreg[src2].s16[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = vssrai_32(env,
> + env->vfp.vreg[src2].s32[j], rs1);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = vssrai_64(env,
> + env->vfp.vreg[src2].s64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
> + - env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[dest].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + - env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[dest].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + - env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[dest].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + - env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[dest].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnmsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]
> + - env->gpr[rs1]
> + * env->vfp.vreg[dest].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s16[j]
> + - env->gpr[rs1]
> + * env->vfp.vreg[dest].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s32[j]
> + - env->gpr[rs1]
> + * env->vfp.vreg[dest].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] =
> env->vfp.vreg[src2].s64[j]
> + - (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[dest].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmsub.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) + vs2[i] */
> +void VECTOR_HELPER(vfnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> +
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmsub.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) + vs2[i] */
> +void VECTOR_HELPER(vfnmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[dest].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vnsrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k]
> + >> (env->vfp.vreg[src1].u8[j] & 0xf);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u32[k]
> + >> (env->vfp.vreg[src1].u16[j] & 0x1f);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u64[k]
> + >> (env->vfp.vreg[src1].u32[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k]
> + >> (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u32[k]
> + >> (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u64[k]
> + >> (env->gpr[rs1] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k]
> + >> (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] =
> env->vfp.vreg[src2].u32[k]
> + >> (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] =
> env->vfp.vreg[src2].u64[k]
> + >> (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsra_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k]
> + >> (env->vfp.vreg[src1].s8[j] & 0xf);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s32[k]
> + >> (env->vfp.vreg[src1].s16[j] & 0x1f);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s64[k]
> + >> (env->vfp.vreg[src1].s32[j] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsra_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k]
> + >> (env->gpr[rs1] & 0xf);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s32[k]
> + >> (env->gpr[rs1] & 0x1f);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s64[k]
> + >> (env->gpr[rs1] & 0x3f);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnsra_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k]
> + >> (rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] =
> env->vfp.vreg[src2].s32[k]
> + >> (rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] =
> env->vfp.vreg[src2].s64[k]
> + >> (rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_narrow(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] += env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] +=
> env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] +=
> env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] +=
> env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] += env->gpr[rs1]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] += env->gpr[rs1]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] += env->gpr[rs1]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] +=
> + (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_c
> |
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclipu.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vnclipu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, k, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[k] = vnclipu_16(env,
> + env->vfp.vreg[src2].u16[j],
> env->vfp.vreg[src1].u8[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vnclipu_32(env,
> + env->vfp.vreg[src2].u32[j],
> env->vfp.vreg[src1].u16[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vnclipu_64(env,
> + env->vfp.vreg[src2].u64[j],
> env->vfp.vreg[src1].u32[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclipu.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vnclipu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[k] = vnclipu_16(env,
> + env->vfp.vreg[src2].u16[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vnclipu_32(env,
> + env->vfp.vreg[src2].u32[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vnclipu_64(env,
> + env->vfp.vreg[src2].u64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +
> +/* vnclipu.vi vd, vs2, imm, vm # vector-immediate */
> +void VECTOR_HELPER(vnclipu_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u8[k] = vnclipui_16(env,
> + env->vfp.vreg[src2].u16[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vnclipui_32(env,
> + env->vfp.vreg[src2].u32[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vnclipui_64(env,
> + env->vfp.vreg[src2].u64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclip.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vnclip_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, k, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[k] = vnclip_16(env,
> + env->vfp.vreg[src2].s16[j],
> env->vfp.vreg[src1].u8[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vnclip_32(env,
> + env->vfp.vreg[src2].s32[j],
> env->vfp.vreg[src1].u16[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vnclip_64(env,
> + env->vfp.vreg[src2].s64[j],
> env->vfp.vreg[src1].u32[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclip.vx vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vnclip_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, k, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[k] = vnclip_16(env,
> + env->vfp.vreg[src2].s16[j], env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vnclip_32(env,
> + env->vfp.vreg[src2].s32[j], env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vnclip_64(env,
> + env->vfp.vreg[src2].s64[j], env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vnclip.vi vd, vs2, imm, vm # vector-immediate */
> +void VECTOR_HELPER(vnclip_vi)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, k, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)
> + || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / (2 * width));
> + k = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[k] = vnclipi_16(env,
> + env->vfp.vreg[src2].s16[j], rs1);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vnclipi_32(env,
> + env->vfp.vreg[src2].s32[j], rs1);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vnclipi_64(env,
> + env->vfp.vreg[src2].s64[j], rs1);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_narrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] -= env->vfp.vreg[src1].s8[j]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] -=
> env->vfp.vreg[src1].s16[j]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] -=
> env->vfp.vreg[src1].s32[j]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] -=
> env->vfp.vreg[src1].s64[j]
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vnmsac_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s8[j] -= env->gpr[rs1]
> + * env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] -= env->gpr[rs1]
> + * env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] -= env->gpr[rs1]
> + * env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] -=
> + (int64_t)extend_gpr(env->gpr[rs1])
> + * env->vfp.vreg[src2].s64[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_common(env, dest, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> +
> env->vfp.vreg[src1].f16[j],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> +
> env->vfp.vreg[src1].f32[j],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> +
> env->vfp.vreg[src1].f64[j],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f16[j],
> +
> env->vfp.vreg[dest].f16[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f32[j],
> +
> env->vfp.vreg[dest].f32[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_muladd(
> + env->fpr[rs1],
> +
> env->vfp.vreg[src2].f64[j],
> +
> env->vfp.vreg[dest].f64[j],
> +
> float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vwredsumu.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(zero-extend(SEW))
> */
> +void VECTOR_HELPER(vwredsumu_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + uint64_t sum = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u8[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u16[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u16[0] = sum;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u16[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u32[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u32[0] = sum;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += env->vfp.vreg[src2].u32[j];
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].u64[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].u64[0] = sum;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwaddu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src1].u8[j] +
> + (uint16_t)env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src1].u16[j] +
> + (uint32_t)env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src1].u32[j] +
> + (uint64_t)env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwaddu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u8[j] +
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u16[j] +
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u32[j] +
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwadd.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_add(
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_add(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwadd.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_add(
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_add(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->fpr[rs1],
> &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vwredsum.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(sign-extend(SEW)) */
> +void VECTOR_HELPER(vwredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + int64_t sum = 0;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += (int16_t)env->vfp.vreg[src2].s8[j] << 8 >> 8;
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].s16[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s16[0] = sum;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += (int32_t)env->vfp.vreg[src2].s16[j] << 16 >>
> 16;
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].s32[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s32[0] = sum;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum += (int64_t)env->vfp.vreg[src2].s32[j] << 32 >>
> 32;
> + }
> + if (i == 0) {
> + sum += env->vfp.vreg[rs1].s64[0];
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].s64[0] = sum;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src1].s8[j] +
> + (int16_t)env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src1].s16[j] +
> + (int32_t)env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src1].s32[j] +
> + (int64_t)env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) +
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) +
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) +
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vd, vs2, vs1, vm # Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW))
> */
> +void VECTOR_HELPER(vfwredsum_vs)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, src2;
> + float32 sum32 = 0.0f;
> + float64 sum64 = 0.0f;
> +
> + lmul = vector_get_lmul(env);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> +
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart != 0) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vl = env->vfp.vl;
> + if (vl == 0) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < VLEN / 64; i++) {
> + env->vfp.vreg[rd].u64[i] = 0;
> + }
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> +
> + if (i < vl) {
> + switch (width) {
> + case 16:
> + if (i == 0) {
> + sum32 = env->vfp.vreg[rs1].f32[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum32 = float32_add(sum32,
> +
> float16_to_float32(env->vfp.vreg[src2].f16[j],
> + true, &env->fp_status),
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f32[0] = sum32;
> + }
> + break;
> + case 32:
> + if (i == 0) {
> + sum64 = env->vfp.vreg[rs1].f64[0];
> + }
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + sum64 = float64_add(sum64,
> +
> float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + if (i == vl - 1) {
> + env->vfp.vreg[rd].f64[0] = sum64;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsubu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u8[j] -
> + (uint16_t)env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u16[j] -
> + (uint32_t)env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u32[j] -
> + (uint64_t)env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsubu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u8[j] -
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u16[j] -
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u32[j] -
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwsub.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_sub(
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_sub(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwsub.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_sub(
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_sub(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->fpr[rs1],
> &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s8[j] -
> + (int16_t)env->vfp.vreg[src1].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s16[j] -
> + (int32_t)env->vfp.vreg[src1].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s32[j] -
> + (int64_t)env->vfp.vreg[src1].s32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)
> + ) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) -
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) -
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) -
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vfwredosum.vs vd, vs2, vs1, vm #
> + * Ordered reduce 2*SEW = 2*SEW + sum(promote(SEW))
> + */
> +void VECTOR_HELPER(vfwredosum_vs)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + helper_vector_vfwredsum_vs(env, vm, rs1, rs2, rd);
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwaddu_wv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src1].u8[j] +
> + (uint16_t)env->vfp.vreg[src2].u16[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src1].u16[j] +
> + (uint32_t)env->vfp.vreg[src2].u32[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src1].u32[j] +
> + (uint64_t)env->vfp.vreg[src2].u64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwaddu_wx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u16[k] +
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u32[k] +
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u64[k] +
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwadd.wv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_add(
> + env->vfp.vreg[src2].f32[k],
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_add(
> + env->vfp.vreg[src2].f64[k],
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwadd.wf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwadd_wf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_add(
> + env->vfp.vreg[src2].f32[k],
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_add(
> + env->vfp.vreg[src2].f64[k],
> + float32_to_float64(env->fpr[rs1],
> &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]) +
> + (int16_t)env->vfp.vreg[src2].s16[k];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]) +
> + (int32_t)env->vfp.vreg[src2].s32[k];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]) +
> + (int64_t)env->vfp.vreg[src2].s64[k];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwadd_wx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s16[k] +
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s32[k] +
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s64[k] +
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsubu_wv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u16[k] -
> + (uint16_t)env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u32[k] -
> + (uint32_t)env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u64[k] -
> + (uint64_t)env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsubu_wx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u16[k] -
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u32[k] -
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u64[k] -
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwsub.wv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_sub(
> + env->vfp.vreg[src2].f32[k],
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_sub(
> + env->vfp.vreg[src2].f64[k],
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwsub.wf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwsub_wf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_sub(
> + env->vfp.vreg[src2].f32[k],
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_sub(
> + env->vfp.vreg[src2].f64[k],
> + float32_to_float64(env->fpr[rs1],
> &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s16[k] -
> + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s32[k] -
> + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s64[k] -
> + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwsub_wx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + dest = rd + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s16[k] -
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s32[k] -
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s64[k] -
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmulu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src1].u8[j] *
> + (uint16_t)env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src1].u16[j] *
> + (uint32_t)env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src1].u32[j] *
> + (uint64_t)env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmulu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] =
> + (uint16_t)env->vfp.vreg[src2].u8[j] *
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] =
> + (uint32_t)env->vfp.vreg[src2].u16[j] *
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] =
> + (uint64_t)env->vfp.vreg[src2].u32[j] *
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmul.vv vd, vs2, vs1, vm # vector-vector */
> +void VECTOR_HELPER(vfwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_mul(
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_mul(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmul.vf vd, vs2, rs1, vm # vector-scalar */
> +void VECTOR_HELPER(vfwmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_mul(
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->fpr[rs1], true,
> + &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_mul(
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->fpr[rs1],
> &env->fp_status),
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmulsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src2].s8[j] *
> + (uint16_t)env->vfp.vreg[src1].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src2].s16[j] *
> + (uint32_t)env->vfp.vreg[src1].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src2].s32[j] *
> + (uint64_t)env->vfp.vreg[src1].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmulsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)env->vfp.vreg[src1].s8[j] *
> + (int16_t)env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)env->vfp.vreg[src1].s16[j] *
> + (int32_t)env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)env->vfp.vreg[src1].s32[j] *
> + (int64_t)env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] =
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] =
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] =
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccu.vv vd, vs1, vs2, vm #
> + * vd[i] = clipu((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env,
> +
> env->vfp.vreg[src2].u8[j],
> +
> env->vfp.vreg[src1].u8[j],
> +
> env->vfp.vreg[dest].u16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env,
> +
> env->vfp.vreg[src2].u16[j],
> +
> env->vfp.vreg[src1].u16[j],
> +
> env->vfp.vreg[dest].u32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env,
> +
> env->vfp.vreg[src2].u32[j],
> +
> env->vfp.vreg[src1].u32[j],
> +
> env->vfp.vreg[dest].u64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccu.vx vd, rs1, vs2, vm #
> + * vd[i] = clipu((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env,
> +
> env->vfp.vreg[src2].u8[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].u16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env,
> +
> env->vfp.vreg[src2].u16[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].u32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env,
> +
> env->vfp.vreg[src2].u32[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].u64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmaccu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] +=
> + (uint16_t)env->vfp.vreg[src1].u8[j] *
> + (uint16_t)env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] +=
> + (uint32_t)env->vfp.vreg[src1].u16[j] *
> + (uint32_t)env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] +=
> + (uint64_t)env->vfp.vreg[src1].u32[j] *
> + (uint64_t)env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmaccu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] +=
> + (uint16_t)env->vfp.vreg[src2].u8[j] *
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] +=
> + (uint32_t)env->vfp.vreg[src2].u16[j] *
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] +=
> + (uint64_t)env->vfp.vreg[src2].u32[j] *
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfwmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfwmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + env->fpr[rs1],
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + 0,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + env->fpr[rs1],
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + 0,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmacc.vv vd, vs1, vs2, vm #
> + * vd[i] = clip((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env,
> +
> env->vfp.vreg[src2].s8[j],
> +
> env->vfp.vreg[src1].s8[j],
> +
> env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env,
> +
> env->vfp.vreg[src2].s16[j],
> +
> env->vfp.vreg[src1].s16[j],
> +
> env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env,
> +
> env->vfp.vreg[src2].s32[j],
> +
> env->vfp.vreg[src1].s32[j],
> +
> env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmacc.vx vd, rs1, vs2, vm #
> + * vd[i] = clip((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env,
> +
> env->vfp.vreg[src2].s8[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env,
> +
> env->vfp.vreg[src2].s16[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env,
> +
> env->vfp.vreg[src2].s32[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccsu.vv vd, vs1, vs2, vm
> + * # vd[i] = clip(-((signed(vs1[i])*unsigned(vs2[i])+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccsu_vv)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env,
> +
> env->vfp.vreg[src2].u8[j],
> +
> env->vfp.vreg[src1].s8[j],
> +
> env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env,
> +
> env->vfp.vreg[src2].u16[j],
> +
> env->vfp.vreg[src1].s16[j],
> +
> env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env,
> +
> env->vfp.vreg[src2].u32[j],
> +
> env->vfp.vreg[src1].s32[j],
> +
> env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccsu.vx vd, rs1, vs2, vm
> + * # vd[i] = clip(-((signed(x[rs1])*unsigned(vs2[i])+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccsu_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env,
> +
> env->vfp.vreg[src2].u8[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env,
> +
> env->vfp.vreg[src2].u16[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env,
> +
> env->vfp.vreg[src2].u32[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vwsmaccus.vx vd, rs1, vs2, vm
> + * # vd[i] = clip(-((unsigned(x[rs1])*signed(vs2[i])+round)>>SEW/2)+vd[i])
> + */
> +void VECTOR_HELPER(vwsmaccus_vx)(CPURISCVState *env, uint32_t vm,
> uint32_t rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> +
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + vl = env->vfp.vl;
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = vwsmaccus_8(env,
> +
> env->vfp.vreg[src2].s8[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s16[k]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = vwsmaccus_16(env,
> +
> env->vfp.vreg[src2].s16[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s32[k]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = vwsmaccus_32(env,
> +
> env->vfp.vreg[src2].s32[j],
> + env->gpr[rs1],
> +
> env->vfp.vreg[dest].s64[k]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vwmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (int16_t)env->vfp.vreg[src1].s8[j]
> + * (int16_t)env->vfp.vreg[src2].s8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (int32_t)env->vfp.vreg[src1].s16[j] *
> + (int32_t)env->vfp.vreg[src2].s16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (int64_t)env->vfp.vreg[src1].s32[j] *
> + (int64_t)env->vfp.vreg[src2].s32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfwnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfwnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + env->fpr[rs1],
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + env->fpr[rs1],
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_c |
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmaccsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src1, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src1 = rs1 + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (int16_t)env->vfp.vreg[src1].s8[j]
> + * (uint16_t)env->vfp.vreg[src2].u8[j];
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (int32_t)env->vfp.vreg[src1].s16[j] *
> + (uint32_t)env->vfp.vreg[src2].u16[j];
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (int64_t)env->vfp.vreg[src1].s32[j] *
> + (uint64_t)env->vfp.vreg[src2].u32[j];
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vwmaccsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (uint16_t)((uint8_t)env->vfp.vreg[src2].u8[j]) *
> + (int16_t)((int8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (uint32_t)((uint16_t)env->vfp.vreg[src2].u16[j]) *
> + (int32_t)((int16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (uint64_t)((uint32_t)env->vfp.vreg[src2].u32[j]) *
> + (int64_t)((int32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfwmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */
> +void VECTOR_HELPER(vfwmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + env->fpr[rs1],
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + env->fpr[rs1],
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_c,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vwmaccus_vx)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl;
> + uint32_t lmul, width, src2, dest, vlmax;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = rs2 + (i / (VLEN / width));
> + dest = rd + (i / (VLEN / (2 * width)));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] +=
> + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) *
> + (uint16_t)((uint8_t)env->gpr[rs1]);
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] +=
> + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) *
> + (uint32_t)((uint16_t)env->gpr[rs1]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] +=
> + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) *
> + (uint64_t)((uint32_t)env->gpr[rs1]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_widen(env, dest, k, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfwnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src1, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs1, false);
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + src1 = rs1 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + float16_to_float32(env->vfp.vreg[src1].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + float32_to_float64(env->vfp.vreg[src1].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> +
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */
> +void VECTOR_HELPER(vfwnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t
> rs1,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float32_muladd(
> + env->fpr[rs1],
> + float16_to_float32(env->vfp.vreg[src2].f16[j],
> true,
> + &env->fp_status),
> + float16_to_float32(env->vfp.vreg[dest].f16[j],
> true,
> + &env->fp_status),
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float64_muladd(
> + env->fpr[rs1],
> + float32_to_float64(env->vfp.vreg[src2].f32[j],
> + &env->fp_status),
> + float32_to_float64(env->vfp.vreg[dest].f32[j],
> + &env->fp_status),
> + float_muladd_negate_product,
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f32[k] = 0;
> + case 32:
> + env->vfp.vreg[dest].f64[k] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +
> +/* vfsqrt.v vd, vs2, vm # Vector-vector square root */
> +void VECTOR_HELPER(vfsqrt_v)(CPURISCVState *env, uint32_t vm, uint32_t
> rs2,
> + uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = float16_sqrt(
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = float32_sqrt(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = float64_sqrt(
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + switch (width) {
> + case 16:
> + env->vfp.vreg[dest].f16[j] = 0;
> + case 32:
> + env->vfp.vreg[dest].f32[j] = 0;
> + case 64:
> + env->vfp.vreg[dest].f64[j] = 0;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfclass.v vd, vs2, vm # Vector-vector */
> +void VECTOR_HELPER(vfclass_v)(CPURISCVState *env, uint32_t vm, uint32_t
> rs2,
> + uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = helper_fclass_h(
> +
> env->vfp.vreg[src2].f16[j]);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = helper_fclass_s(
> +
> env->vfp.vreg[src2].f32[j]);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = helper_fclass_d(
> +
> env->vfp.vreg[src2].f64[j]);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
> +void VECTOR_HELPER(vfcvt_xu_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> + if (vector_vtype_ill(env)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[j] = float16_to_uint16(
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[j] = float32_to_uint32(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[j] = float64_to_uint64(
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
> +void VECTOR_HELPER(vfcvt_x_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[j] = float16_to_int16(
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[j] = float32_to_int32(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[j] = float64_to_int64(
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
> +void VECTOR_HELPER(vfcvt_f_xu_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = uint16_to_float16(
> +
> env->vfp.vreg[src2].u16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = uint32_to_float32(
> +
> env->vfp.vreg[src2].u32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = uint64_to_float64(
> +
> env->vfp.vreg[src2].u64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
> +void VECTOR_HELPER(vfcvt_f_x_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[j] = int16_to_float16(
> +
> env->vfp.vreg[src2].s16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[j] = int32_to_float32(
> +
> env->vfp.vreg[src2].s32[j],
> + &env->fp_status);
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[j] = int64_to_float64(
> +
> env->vfp.vreg[src2].s64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fcommon(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned
> integer.*/
> +void VECTOR_HELPER(vfwcvt_xu_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = float16_to_uint32(
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u64[k] = float32_to_uint64(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + }
> + } else {
> + vector_tail_fwiden(env, dest, j, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed
> integer. */
> +void VECTOR_HELPER(vfwcvt_x_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = float16_to_int32(
> +
> env->vfp.vreg[src2].f16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s64[k] = float32_to_int64(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width
> float */
> +void VECTOR_HELPER(vfwcvt_f_xu_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = uint16_to_float32(
> +
> env->vfp.vreg[src2].u16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = uint32_to_float64(
> +
> env->vfp.vreg[src2].u32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
> +void VECTOR_HELPER(vfwcvt_f_x_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = int16_to_float32(
> +
> env->vfp.vreg[src2].s16[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = int32_to_float64(
> +
> env->vfp.vreg[src2].s32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/*
> + * vfwcvt.f.f.v vd, vs2, vm #
> + * Convert single-width float to double-width float.
> + */
> +void VECTOR_HELPER(vfwcvt_f_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env)
> + || vector_overlap_vm_force(vm, rd)
> + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, false);
> + vector_lmul_check_reg(env, lmul, rd, true);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / (2 * width)));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float16_to_float32(
> +
> env->vfp.vreg[src2].f16[j],
> + true,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f64[k] = float32_to_float64(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fwiden(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
> +void VECTOR_HELPER(vfncvt_xu_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u16[k] = float32_to_uint16(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].u32[k] = float64_to_uint32(
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed
> integer. */
> +void VECTOR_HELPER(vfncvt_x_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s16[k] = float32_to_int16(
> +
> env->vfp.vreg[src2].f32[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].s32[k] = float64_to_int32(
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to
> float */
> +void VECTOR_HELPER(vfncvt_f_xu_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> +
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[k] = uint32_to_float16(
> +
> env->vfp.vreg[src2].u32[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = uint64_to_float32(
> +
> env->vfp.vreg[src2].u64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
> +void VECTOR_HELPER(vfncvt_f_x_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[k] = int32_to_float16(
> +
> env->vfp.vreg[src2].s32[j],
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = int64_to_float32(
> +
> env->vfp.vreg[src2].s64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float.
> */
> +void VECTOR_HELPER(vfncvt_f_f_v)(CPURISCVState *env, uint32_t vm,
> + uint32_t rs2, uint32_t rd)
> +{
> + int width, lmul, vl, vlmax;
> + int i, j, k, dest, src2;
> +
> + lmul = vector_get_lmul(env);
> + vl = env->vfp.vl;
> + if (vector_vtype_ill(env) ||
> + vector_overlap_vm_common(lmul, vm, rd) ||
> + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rs2, true);
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + if (lmul > 4) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (env->vfp.vstart >= vl) {
> + return;
> + }
> +
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / (2 * width)));
> + k = i % (VLEN / width);
> + j = i % (VLEN / (2 * width));
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f16[k] = float32_to_float16(
> +
> env->vfp.vreg[src2].f32[j],
> + true,
> + &env->fp_status);
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + env->vfp.vreg[dest].f32[k] = float64_to_float32(
> +
> env->vfp.vreg[src2].f64[j],
> + &env->fp_status);
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_fnarrow(env, dest, k, width);
> + }
> + }
> + return;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s8[j] =
> + cpu_ldsb_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlsbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlsb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].s8[j] =
> + cpu_ldsb_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s8[j] =
> + cpu_ldsb_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> sign_extend(
> + cpu_ldsb_data(env, addr), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsb_data(env, addr), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsb_data(env, addr), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlbuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlbff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s8[j] =
> + cpu_ldsb_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlhu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> + cpu_ldsw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlshu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlsh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> + cpu_ldsw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxhu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_lduw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_lduw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> + cpu_ldsw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsw_data(env, addr), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsw_data(env, addr), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlhuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlhff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s16[j] =
> + cpu_ldsw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldl_data(env, env->gpr[rs1] + read), 32);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlwu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlswu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlsw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldl_data(env, env->gpr[rs1] + read), 32);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxwu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldl_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> + cpu_ldl_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldl_data(env, addr), 32);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlwuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlwff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->foflag = true;
> + env->vfp.vl = 0;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].s32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].s64[j] =
> sign_extend(
> + cpu_ldl_data(env, env->gpr[rs1] + read), 32);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vle_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 8;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldq_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * env->gpr[rs2] + k * 8;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldq_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vlxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 8,
> width, k);
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldq_data(env, addr);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vleff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, read;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> + env->vfp.vl = 0;
> + env->foflag = true;
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = i * (nf + 1) + k;
> + env->vfp.vreg[dest + k * lmul].u8[j] =
> + cpu_ldub_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 2;
> + env->vfp.vreg[dest + k * lmul].u16[j] =
> + cpu_lduw_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 4;
> + env->vfp.vreg[dest + k * lmul].u32[j] =
> + cpu_ldl_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + read = (i * (nf + 1) + k) * 8;
> + env->vfp.vreg[dest + k * lmul].u64[j] =
> + cpu_ldq_data(env, env->gpr[rs1] + read);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + env->vfp.vl++;
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + } else {
> + vector_tail_segment(env, dest, j, width, k, lmul);
> + }
> + }
> + env->foflag = false;
> + env->vfp.vl = vl;
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vssb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsuxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + return VECTOR_HELPER(vsxb_v)(env, nf, vm, rs1, rs2, rd);
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vssh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + cpu_stw_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + cpu_stw_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + cpu_stw_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsuxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + return VECTOR_HELPER(vsxh_v)(env, nf, vm, rs1, rs2, rd);
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vssw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + cpu_stl_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + cpu_stl_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsuxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + return VECTOR_HELPER(vsxw_v)(env, nf, vm, rs1, rs2, rd);
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * (nf + 1) + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = (i * (nf + 1) + k) * 8;
> + cpu_stq_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, wrote;
> +
> + vl = env->vfp.vl;
> +
> +
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k;
> + cpu_stb_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 2;
> + cpu_stw_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 4;
> + cpu_stl_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + wrote = i * env->gpr[rs2] + k * 8;
> + cpu_stq_data(env, env->gpr[rs1] + wrote,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + int i, j, k, vl, vlmax, lmul, width, dest, src2;
> + target_ulong addr;
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> +
> + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + if (lmul * (nf + 1) > 32) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, rd, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + dest = rd + (i / (VLEN / width));
> + src2 = rs2 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + k = nf;
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 8:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 1,
> width, k);
> + cpu_stb_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s8[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 16:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 2,
> width, k);
> + cpu_stw_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s16[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 4,
> width, k);
> + cpu_stl_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s32[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + while (k >= 0) {
> + addr = vector_get_index(env, rs1, src2, j, 8,
> width, k);
> + cpu_stq_data(env, addr,
> + env->vfp.vreg[dest + k * lmul].s64[j]);
> + k--;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + return;
> + }
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vsuxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
> + uint32_t rs1, uint32_t rs2, uint32_t rd)
> +{
> + return VECTOR_HELPER(vsxe_v)(env, nf, vm, rs1, rs2, rd);
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoswapw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_xchgl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_xchgl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env,
> addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env,
> addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamoswapd_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_xchgq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_xchgq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> +
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoaddw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_addl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_addl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_addl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_addl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vamoaddd_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_addq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_addq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoxorw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_xorl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_xorl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamoxord_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_xorq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_xorq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoandw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_andl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_andl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_andl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_andl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> +
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoandd_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_andq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_andq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoorw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_orl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_orl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_orl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_orl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamoord_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_orq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_orq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamominw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_sminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_sminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamomind_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_sminq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_sminq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamomaxw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_smaxl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_smaxl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env,
> + addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp =
> (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env,
> + addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamomaxd_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + int64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_smaxq_le(env, addr,
> + env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_smaxq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamominuw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_uminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_uminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le(
> + env, addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le(
> + env, addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +
> +void VECTOR_HELPER(vamominud_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_uminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_uminl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_uminq_le(
> + env, addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_uminq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> +void VECTOR_HELPER(vamomaxuw_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TESL;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 32 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 32:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint32_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s32[j];
> + addr = idx + env->gpr[rs1];
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_umaxl_le(env, addr,
> + env->vfp.vreg[src3].s32[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_umaxl_le(env, addr,
> + env->vfp.vreg[src3].s32[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s32[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le(
> + env, addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le(
> + env, addr, env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vamomaxud_v)(CPURISCVState *env, uint32_t wd, uint32_t
> vm,
> + uint32_t rs1, uint32_t vs2, uint32_t vs3)
> +{
> + int i, j, vl;
> + target_long idx;
> + uint32_t lmul, width, src2, src3, vlmax;
> + target_ulong addr;
> +#ifdef CONFIG_SOFTMMU
> + int mem_idx = cpu_mmu_index(env, false);
> + TCGMemOp memop = MO_ALIGN | MO_TEQ;
> +#endif
> +
> + vl = env->vfp.vl;
> + lmul = vector_get_lmul(env);
> + width = vector_get_width(env);
> + vlmax = vector_get_vlmax(env);
> + /* MEM <= SEW <= XLEN */
> + if (width < 64 || (width > sizeof(target_ulong) * 8)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> + /* if wd, rd is writen the old value */
> + if (vector_vtype_ill(env) ||
> + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) {
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> + return;
> + }
> +
> + vector_lmul_check_reg(env, lmul, vs2, false);
> + vector_lmul_check_reg(env, lmul, vs3, false);
> +
> + for (i = 0; i < vlmax; i++) {
> + src2 = vs2 + (i / (VLEN / width));
> + src3 = vs3 + (i / (VLEN / width));
> + j = i % (VLEN / width);
> + if (i < env->vfp.vstart) {
> + continue;
> + } else if (i < vl) {
> + switch (width) {
> + case 64:
> + if (vector_elem_mask(env, vm, width, lmul, i)) {
> + uint64_t tmp;
> + idx = (target_long)env->vfp.vreg[src2].s64[j];
> + addr = idx + env->gpr[rs1];
> +
> +#ifdef CONFIG_SOFTMMU
> + tmp = helper_atomic_fetch_umaxq_le(
> + env, addr, env->vfp.vreg[src3].s64[j],
> + make_memop_idx(memop & ~MO_SIGN, mem_idx));
> +#else
> + tmp = helper_atomic_fetch_umaxq_le(env, addr,
> + env->vfp.vreg[src3].s64[j]);
> +#endif
> + if (wd) {
> + env->vfp.vreg[src3].s64[j] = tmp;
> + }
> + env->vfp.vstart++;
> + }
> + break;
> + default:
> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST,
> GETPC());
> + break;
> + }
> + } else {
> + vector_tail_amo(env, src3, j, width);
> + }
> + }
> + env->vfp.vstart = 0;
> +}
> +
> --
> 2.7.4
>
>
>
[-- Attachment #2: Type: text/html, Size: 1319543 bytes --]
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-29 12:45 ` [Qemu-riscv] " liuzhiwei
@ 2019-08-29 15:09 ` Richard Henderson
-1 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-08-29 15:09 UTC (permalink / raw)
To: liuzhiwei, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 8/29/19 5:45 AM, liuzhiwei wrote:
> Even in qemu, it may be some situations that VSTART != 0. For example, a load
> instruction leads to a page fault exception in a middle position. If VSTART ==
> 0, some elements that had been loaded before the exception will be loaded once
> again.
Alternately, you can validate all of the pages before performing any memory
operations. At which point there will never be an exception in the middle.
As it turns out, you *must* do this in order to allow watchpoints to work
correctly. David Hildebrand and I are at this moment fixing this aspect of
watchpoints for s390x.
See https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05979.html
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-29 15:09 ` Richard Henderson
0 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-08-29 15:09 UTC (permalink / raw)
To: liuzhiwei, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 8/29/19 5:45 AM, liuzhiwei wrote:
> Even in qemu, it may be some situations that VSTART != 0. For example, a load
> instruction leads to a page fault exception in a middle position. If VSTART ==
> 0, some elements that had been loaded before the exception will be loaded once
> again.
Alternately, you can validate all of the pages before performing any memory
operations. At which point there will never be an exception in the middle.
As it turns out, you *must* do this in order to allow watchpoints to work
correctly. David Hildebrand and I are at this moment fixing this aspect of
watchpoints for s390x.
See https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05979.html
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-29 12:00 ` [Qemu-riscv] " liuzhiwei
@ 2019-08-29 15:14 ` Richard Henderson
-1 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-08-29 15:14 UTC (permalink / raw)
To: liuzhiwei, Alistair Francis
Cc: Peter Maydell, Riku Voipio, open list:RISC-V, Sagar Karandikar,
Bastian Koppelmann, Palmer Dabbelt,
qemu-devel@nongnu.org Developers, Laurent Vivier,
Alistair Francis, Alex Bennée, Aurelien Jarno
On 8/29/19 5:00 AM, liuzhiwei wrote:
> Maybe there is some better test method or some forced test cases in QEMU. Could
> you give me some advice for testing?
If you have hardware, or another simulator, RISU is very good
for testing these sorts of things.
See https://git.linaro.org/people/pmaydell/risu.git
You'll need to write new support for RISC-V, but it's not hard
and we can help out with that.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-29 15:14 ` Richard Henderson
0 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-08-29 15:14 UTC (permalink / raw)
To: liuzhiwei, Alistair Francis
Cc: Peter Maydell, Riku Voipio, open list:RISC-V, Sagar Karandikar,
Bastian Koppelmann, Palmer Dabbelt,
qemu-devel@nongnu.org Developers, Laurent Vivier,
Alistair Francis, Alex Bennée, Aurelien Jarno
On 8/29/19 5:00 AM, liuzhiwei wrote:
> Maybe there is some better test method or some forced test cases in QEMU. Could
> you give me some advice for testing?
If you have hardware, or another simulator, RISU is very good
for testing these sorts of things.
See https://git.linaro.org/people/pmaydell/risu.git
You'll need to write new support for RISC-V, but it's not hard
and we can help out with that.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-29 12:56 ` liuzhiwei
@ 2019-08-29 18:32 ` Aleksandar Markovic
0 siblings, 0 replies; 52+ messages in thread
From: Aleksandar Markovic @ 2019-08-29 18:32 UTC (permalink / raw)
To: liuzhiwei
Cc: Peter Maydell, Palmer Dabbelt, open list:RISC-V,
Sagar Karandikar, Bastian Koppelmann, Riku Voipio,
Laurent Vivier, QEMU Developers, Alistair Francis,
Alex Bennée, Aurelien Jarno
29.08.2019. 15.02, "liuzhiwei" <zhiwei_liu@c-sky.com> је написао/ла:
>
>
> On 2019/8/29 上午3:20, Aleksandar Markovic wrote:
>>
>>
>>
>> > On Wed, Aug 28, 2019 at 9:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>>>
>>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>>> ---
>>
>>
>> Such large patch and "Change-Id:
I3cf891bc400713b95f47ecca82b1bf773f3dcb25" is its entire commit message??
Horrible.
>
> Hi, Aleksandar
>
> I am so sorry. A patch set with cover letter will be sent later.
>
> Best Regards,
>
> Zhiwei
OK, Zhiwei,
You'll soon get more used to participating in open source, and write much
better patches.
Try to follow guidelines described at
https://wiki.qemu.org/Contribute/SubmitAPatch
Thanks,
Aleksandar
>>
>> Aleksandar
>>
>>>
>>> fpu/softfloat.c | 119 +
>>> include/fpu/softfloat.h | 4 +
>>> linux-user/riscv/cpu_loop.c | 8 +-
>>> target/riscv/Makefile.objs | 2 +-
>>> target/riscv/cpu.h | 30 +
>>> target/riscv/cpu_bits.h | 15 +
>>> target/riscv/cpu_helper.c | 7 +
>>> target/riscv/csr.c | 65 +-
>>> target/riscv/helper.h | 354 +
>>> target/riscv/insn32.decode | 374 +-
>>> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>>> target/riscv/translate.c | 1 +
>>> target/riscv/vector_helper.c | 26563
++++++++++++++++++++++++++++++
>>> 13 files changed, 28017 insertions(+), 9 deletions(-)
>>> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
>>> create mode 100644 target/riscv/vector_helper.c
>>>
>>> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
>>> index 2ba36ec..da155ea 100644
>>> --- a/fpu/softfloat.c
>>> +++ b/fpu/softfloat.c
>>> @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a)
>>> }
>>>
>>>
/*----------------------------------------------------------------------------
>>> +| Returns the sign bit of the half-precision floating-point value `a'.
>>>
+*----------------------------------------------------------------------------*/
>>> +
>>> +static inline flag extractFloat16Sign(float16 a)
>>> +{
>>> + return float16_val(a) >> 0xf;
>>> +}
>>> +
>>> +
>>>
+/*----------------------------------------------------------------------------
>>> | Returns the fraction bits of the single-precision floating-point
value `a'.
>>>
*----------------------------------------------------------------------------*/
>>>
>>> @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b,
float_status *status)
>>> }
>>>
>>>
/*----------------------------------------------------------------------------
>>> +| Returns 1 if the half-precision floating-point value `a' is less than
>>> +| or equal to the corresponding value `b', and 0 otherwise. The
invalid
>>> +| exception is raised if either operand is a NaN. The comparison is
performed
>>> +| according to the IEC/IEEE Standard for Binary Floating-Point
Arithmetic.
>>>
+*----------------------------------------------------------------------------*/
>>> +
>>> +int float16_le(float16 a, float16 b, float_status *status)
>>> +{
>>> + flag aSign, bSign;
>>> + uint16_t av, bv;
>>> + a = float16_squash_input_denormal(a, status);
>>> + b = float16_squash_input_denormal(b, status);
>>> +
>>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) &&
extractFloat16Frac( a ) )
>>> + || ( ( extractFloat16Exp( b ) == 0x1F ) &&
extractFloat16Frac( b ) )
>>> + ) {
>>> + float_raise(float_flag_invalid, status);
>>> + return 0;
>>> + }
>>> + aSign = extractFloat16Sign( a );
>>> + bSign = extractFloat16Sign( b );
>>> + av = float16_val(a);
>>> + bv = float16_val(b);
>>> + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv
)<<1 ) == 0 );
>>> + return ( av == bv ) || ( aSign ^ ( av < bv ) );
>>> +
>>> +}
>>> +
>>>
+/*----------------------------------------------------------------------------
>>> | Returns 1 if the single-precision floating-point value `a' is less
than
>>> | or equal to the corresponding value `b', and 0 otherwise. The
invalid
>>> | exception is raised if either operand is a NaN. The comparison is
performed
>>> @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b,
float_status *status)
>>> | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>>>
*----------------------------------------------------------------------------*/
>>>
>>> +int float16_lt(float16 a, float16 b, float_status *status)
>>> +{
>>> + flag aSign, bSign;
>>> + uint16_t av, bv;
>>> + a = float16_squash_input_denormal(a, status);
>>> + b = float16_squash_input_denormal(b, status);
>>> +
>>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) &&
extractFloat16Frac( a ) )
>>> + || ( ( extractFloat16Exp( b ) == 0x1F ) &&
extractFloat16Frac( b ) )
>>> + ) {
>>> + float_raise(float_flag_invalid, status);
>>> + return 0;
>>> + }
>>> + aSign = extractFloat16Sign( a );
>>> + bSign = extractFloat16Sign( b );
>>> + av = float16_val(a);
>>> + bv = float16_val(b);
>>> + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv
)<<1 ) != 0 );
>>> + return ( av != bv ) && ( aSign ^ ( av < bv ) );
>>> +
>>> +}
>>> +
>>>
+/*----------------------------------------------------------------------------
>>> +| Returns 1 if the single-precision floating-point value `a' is less
than
>>> +| the corresponding value `b', and 0 otherwise. The invalid exception
is
>>> +| raised if either operand is a NaN. The comparison is performed
according
>>> +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>>>
+*----------------------------------------------------------------------------*/
>>> +
>>> int float32_lt(float32 a, float32 b, float_status *status)
>>> {
>>> flag aSign, bSign;
>>> @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b,
float_status *status)
>>> }
>>>
>>>
/*----------------------------------------------------------------------------
>>> +| Returns 1 if the half-precision floating-point value `a' is equal to
>>> +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not
cause an
>>> +| exception. The comparison is performed according to the IEC/IEEE
Standard
>>> +| for Binary Floating-Point Arithmetic.
>>>
+*----------------------------------------------------------------------------*/
>>> +
>>> +int float16_eq_quiet(float16 a, float16 b, float_status *status)
>>> +{
>>> + a = float16_squash_input_denormal(a, status);
>>> + b = float16_squash_input_denormal(b, status);
>>> +
>>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) &&
extractFloat16Frac( a ) )
>>> + || ( ( extractFloat16Exp( b ) == 0x1F ) &&
extractFloat16Frac( b ) )
>>> + ) {
>>> + if (float16_is_signaling_nan(a, status)
>>> + || float16_is_signaling_nan(b, status)) {
>>> + float_raise(float_flag_invalid, status);
>>> + }
>>> + return 0;
>>> + }
>>> + return ( float16_val(a) == float16_val(b) ) ||
>>> + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) ==
0 );
>>> +}
>>> +
>>> +
>>>
+/*----------------------------------------------------------------------------
>>> | Returns 1 if the single-precision floating-point value `a' is equal
to
>>> | the corresponding value `b', and 0 otherwise. Quiet NaNs do not
cause an
>>> | exception. The comparison is performed according to the IEC/IEEE
Standard
>>> @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b,
float_status *status)
>>> }
>>>
>>>
/*----------------------------------------------------------------------------
>>> +| Returns 1 if the half-precision floating-point values `a' and `b'
cannot
>>> +| be compared, and 0 otherwise. Quiet NaNs do not cause an
exception. The
>>> +| comparison is performed according to the IEC/IEEE Standard for Binary
>>> +| Floating-Point Arithmetic.
>>>
+*----------------------------------------------------------------------------*/
>>> +
>>> +int float16_unordered_quiet(float16 a, float16 b, float_status *status)
>>> +{
>>> + a = float16_squash_input_denormal(a, status);
>>> + b = float16_squash_input_denormal(b, status);
>>> +
>>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) &&
extractFloat16Frac( a ) )
>>> + || ( ( extractFloat16Exp( b ) == 0x1F ) &&
extractFloat16Frac( b ) )
>>> + ) {
>>> + if (float16_is_signaling_nan(a, status)
>>> + || float16_is_signaling_nan(b, status)) {
>>> + float_raise(float_flag_invalid, status);
>>> + }
>>> + return 1;
>>> + }
>>> + return 0;
>>> +}
>>> +
>>> +
>>>
+/*----------------------------------------------------------------------------
>>> | Returns 1 if the single-precision floating-point values `a' and `b'
cannot
>>> | be compared, and 0 otherwise. Quiet NaNs do not cause an
exception. The
>>> | comparison is performed according to the IEC/IEEE Standard for Binary
>>> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
>>> index 3ff3fa5..3b0754c 100644
>>> --- a/include/fpu/softfloat.h
>>> +++ b/include/fpu/softfloat.h
>>> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16,
float_status *status);
>>> float16 float16_sqrt(float16, float_status *status);
>>> int float16_compare(float16, float16, float_status *status);
>>> int float16_compare_quiet(float16, float16, float_status *status);
>>> +int float16_unordered_quiet(float16, float16, float_status *status);
>>> +int float16_le(float16, float16, float_status *status);
>>> +int float16_lt(float16, float16, float_status *status);
>>> +int float16_eq_quiet(float16, float16, float_status *status);
>>>
>>> int float16_is_quiet_nan(float16, float_status *status);
>>> int float16_is_signaling_nan(float16, float_status *status);
>>> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
>>> index 12aa3c0..b01548a 100644
>>> --- a/linux-user/riscv/cpu_loop.c
>>> +++ b/linux-user/riscv/cpu_loop.c
>>> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
>>> signum = 0;
>>> sigcode = 0;
>>> sigaddr = 0;
>>> -
>>> + if (env->foflag) {
>>> + if (env->vfp.vl != 0) {
>>> + env->foflag = false;
>>> + env->pc += 4;
>>> + continue;
>>> + }
>>> + }
>>> switch (trapnr) {
>>> case EXCP_INTERRUPT:
>>> /* just indicate that signals should be handled asap */
>>> diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
>>> index b1c79bc..d577cef 100644
>>> --- a/target/riscv/Makefile.objs
>>> +++ b/target/riscv/Makefile.objs
>>> @@ -1,4 +1,4 @@
>>> -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o
gdbstub.o pmp.o
>>> +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o
vector_helper.o gdbstub.o pmp.o
>>>
>>> DECODETREE = $(SRC_PATH)/scripts/decodetree.py
>>>
>>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>>> index 0adb307..5a93aa2 100644
>>> --- a/target/riscv/cpu.h
>>> +++ b/target/riscv/cpu.h
>>> @@ -67,6 +67,7 @@
>>> #define RVC RV('C')
>>> #define RVS RV('S')
>>> #define RVU RV('U')
>>> +#define RVV RV('V')
>>>
>>> /* S extension denotes that Supervisor mode exists, however it is
possible
>>> to have a core that support S mode but does not have an MMU and
there
>>> @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState;
>>>
>>> #include "pmp.h"
>>>
>>> +#define VLEN 128
>>> +#define VUNIT(x) (VLEN / x)
>>> +
>>> struct CPURISCVState {
>>> target_ulong gpr[32];
>>> uint64_t fpr[32]; /* assume both F and D extensions */
>>> +
>>> + /* vector coprocessor state. */
>>> + struct {
>>> + union VECTOR {
>>> + float64 f64[VUNIT(64)];
>>> + float32 f32[VUNIT(32)];
>>> + float16 f16[VUNIT(16)];
>>> + target_ulong ul[VUNIT(sizeof(target_ulong))];
>>> + uint64_t u64[VUNIT(64)];
>>> + int64_t s64[VUNIT(64)];
>>> + uint32_t u32[VUNIT(32)];
>>> + int32_t s32[VUNIT(32)];
>>> + uint16_t u16[VUNIT(16)];
>>> + int16_t s16[VUNIT(16)];
>>> + uint8_t u8[VUNIT(8)];
>>> + int8_t s8[VUNIT(8)];
>>> + } vreg[32];
>>> + target_ulong vxrm;
>>> + target_ulong vxsat;
>>> + target_ulong vl;
>>> + target_ulong vstart;
>>> + target_ulong vtype;
>>> + float_status fp_status;
>>> + } vfp;
>>> +
>>> + bool foflag;
>>> target_ulong pc;
>>> target_ulong load_res;
>>> target_ulong load_val;
>>> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
>>> index 11f971a..9eb43ec 100644
>>> --- a/target/riscv/cpu_bits.h
>>> +++ b/target/riscv/cpu_bits.h
>>> @@ -29,6 +29,14 @@
>>> #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT)
>>> #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA |
FSR_NXA)
>>>
>>> +/* Vector Fixed-Point round model */
>>> +#define FSR_VXRM_SHIFT 9
>>> +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT)
>>> +
>>> +/* Vector Fixed-Point saturation flag */
>>> +#define FSR_VXSAT_SHIFT 8
>>> +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT)
>>> +
>>> /* Control and Status Registers */
>>>
>>> /* User Trap Setup */
>>> @@ -48,6 +56,13 @@
>>> #define CSR_FRM 0x002
>>> #define CSR_FCSR 0x003
>>>
>>> +/* User Vector CSRs */
>>> +#define CSR_VSTART 0x008
>>> +#define CSR_VXSAT 0x009
>>> +#define CSR_VXRM 0x00a
>>> +#define CSR_VL 0xc20
>>> +#define CSR_VTYPE 0xc21
>>> +
>>> /* User Timers and Counters */
>>> #define CSR_CYCLE 0xc00
>>> #define CSR_TIME 0xc01
>>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>>> index e32b612..405caf6 100644
>>> --- a/target/riscv/cpu_helper.c
>>> +++ b/target/riscv/cpu_helper.c
>>> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
>>> [PRV_H] = RISCV_EXCP_H_ECALL,
>>> [PRV_M] = RISCV_EXCP_M_ECALL
>>> };
>>> + if (env->foflag) {
>>> + if (env->vfp.vl != 0) {
>>> + env->foflag = false;
>>> + env->pc += 4;
>>> + return;
>>> + }
>>> + }
>>>
>>> if (!async) {
>>> /* set tval to badaddr for traps with address information */
>>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
>>> index e0d4586..a6131ff 100644
>>> --- a/target/riscv/csr.c
>>> +++ b/target/riscv/csr.c
>>> @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno)
>>> return 0;
>>> }
>>>
>>> -#if !defined(CONFIG_USER_ONLY)
>>> static int any(CPURISCVState *env, int csrno)
>>> {
>>> return 0;
>>> }
>>>
>>> +#if !defined(CONFIG_USER_ONLY)
>>> static int smode(CPURISCVState *env, int csrno)
>>> {
>>> return -!riscv_has_ext(env, RVS);
>>> @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int
csrno, target_ulong *val)
>>> return -1;
>>> }
>>> #endif
>>> - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
>>> - | (env->frm << FSR_RD_SHIFT);
>>> + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT)
>>> + | (env->vfp.vxsat << FSR_VXSAT_SHIFT)
>>> + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
>>> + | (env->frm << FSR_RD_SHIFT);
>>> return 0;
>>> }
>>>
>>> @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int
csrno, target_ulong val)
>>> env->mstatus |= MSTATUS_FS;
>>> #endif
>>> env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
>>> + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
>>> + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
>>> riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
>>> return 0;
>>> }
>>>
>>> +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val)
>>> +{
>>> + *val = env->vfp.vtype;
>>> + return 0;
>>> +}
>>> +
>>> +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
>>> +{
>>> + *val = env->vfp.vl;
>>> + return 0;
>>> +}
>>> +
>>> +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val)
>>> +{
>>> + *val = env->vfp.vxrm;
>>> + return 0;
>>> +}
>>> +
>>> +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val)
>>> +{
>>> + *val = env->vfp.vxsat;
>>> + return 0;
>>> +}
>>> +
>>> +static int read_vstart(CPURISCVState *env, int csrno, target_ulong
*val)
>>> +{
>>> + *val = env->vfp.vstart;
>>> + return 0;
>>> +}
>>> +
>>> +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val)
>>> +{
>>> + env->vfp.vxrm = val;
>>> + return 0;
>>> +}
>>> +
>>> +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val)
>>> +{
>>> + env->vfp.vxsat = val;
>>> + return 0;
>>> +}
>>> +
>>> +static int write_vstart(CPURISCVState *env, int csrno, target_ulong
val)
>>> +{
>>> + env->vfp.vstart = val;
>>> + return 0;
>>> +}
>>> +
>>> /* User Timers and Counters */
>>> static int read_instret(CPURISCVState *env, int csrno, target_ulong
*val)
>>> {
>>> @@ -873,7 +925,12 @@ static riscv_csr_operations
csr_ops[CSR_TABLE_SIZE] = {
>>> [CSR_FFLAGS] = { fs, read_fflags,
write_fflags },
>>> [CSR_FRM] = { fs, read_frm, write_frm
},
>>> [CSR_FCSR] = { fs, read_fcsr, write_fcsr
},
>>> -
>>> + /* Vector CSRs */
>>> + [CSR_VSTART] = { any, read_vstart,
write_vstart },
>>> + [CSR_VXSAT] = { any, read_vxsat, write_vxsat
},
>>> + [CSR_VXRM] = { any, read_vxrm, write_vxrm
},
>>> + [CSR_VL] = { any, read_vl
},
>>> + [CSR_VTYPE] = { any, read_vtype
},
>>> /* User Timers and Counters */
>>> [CSR_CYCLE] = { ctr, read_instret
},
>>> [CSR_INSTRET] = { ctr, read_instret
},
>>> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
>>> index debb22a..fee02c0 100644
>>> --- a/target/riscv/helper.h
>>> +++ b/target/riscv/helper.h
>>> @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl)
>>> DEF_HELPER_1(wfi, void, env)
>>> DEF_HELPER_1(tlb_flush, void, env)
>>> #endif
>>> +/* Vector functions */
>>> +DEF_HELPER_5(vector_vlb_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlh_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlw_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vle_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlbu_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlhu_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlwu_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlbff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlhff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlwff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vleff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlbuff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlhuff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlwuff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsb_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsh_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsw_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vse_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlsb_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlsh_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlsw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlse_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlsbu_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlshu_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlswu_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vssb_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vssh_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vssw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsse_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxb_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxh_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxe_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxbu_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxhu_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxwu_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsxb_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsxh_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsxw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsxe_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsuxb_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsuxh_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsuxw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsuxe_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoswapw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoswapd_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoaddw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoaddd_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoxorw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoxord_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoandw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoandd_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoorw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoord_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamominw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamomind_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamomaxw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamomaxd_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamominuw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamominud_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamomaxuw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamomaxud_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vext_x_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfmv_f_s, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmv_s_x, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfmv_s_f, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vadc_vvm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vadc_vxm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vadc_vim, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmadc_vvm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmadc_vxm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmadc_vim, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vsbc_vvm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vsbc_vxm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmsbc_vvm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmsbc_vxm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmpopc_m, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmfirst_m, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vcompress_vm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmandnot_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmand_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmor_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmxor_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmornot_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmnand_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmnor_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmxnor_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmsbf_m, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmsof_m, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmsif_m, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_viota_m, void, env, i32, i32, i32)
>>> +DEF_HELPER_3(vector_vid_v, void, env, i32, i32)
>>> +DEF_HELPER_4(vector_vfcvt_xu_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfcvt_x_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfcvt_f_xu_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfcvt_f_x_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfwcvt_xu_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfwcvt_x_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfwcvt_f_xu_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfwcvt_f_x_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfwcvt_f_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfncvt_xu_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfncvt_x_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfncvt_f_xu_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfncvt_f_x_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfncvt_f_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfsqrt_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfclass_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vadd_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vadd_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredsum_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfadd_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredand_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfredsum_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsub_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredor_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsub_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrsub_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrsub_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredxor_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfredosum_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vminu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vminu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredminu_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmin_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmin_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmin_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmin_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredmin_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfredmin_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmaxu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmaxu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredmaxu_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmax_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmax_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmax_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmax_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredmax_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfredmax_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsgnj_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsgnj_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vand_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vand_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vand_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsgnjn_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsgnjn_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vor_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vor_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vor_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsgnjx_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsgnjx_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vxor_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vxor_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vxor_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrgather_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrgather_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrgather_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vslideup_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vslideup_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vslide1up_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vslidedown_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vslidedown_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vslide1down_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmerge_vvm, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmerge_vxm, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmerge_vim, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmerge_vfm, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmseq_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmseq_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmseq_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfeq_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfeq_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsne_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsne_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsne_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfle_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfle_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsltu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsltu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmford_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmford_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmslt_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmslt_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmflt_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmflt_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsleu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsleu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsleu_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfne_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfne_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsle_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsle_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsle_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfgt_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsgtu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsgtu_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsgt_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsgt_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfge_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsaddu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsaddu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsaddu_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vdivu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vdivu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfdiv_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfdiv_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsadd_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsadd_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vdiv_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vdiv_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfrdiv_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssubu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssubu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vremu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vremu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssub_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrem_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrem_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vaadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vaadd_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vaadd_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmulhu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmulhu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmul_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmul_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsll_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsll_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsll_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmul_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmul_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vasub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vasub_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmulhsu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmulhsu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsmul_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsmul_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmulh_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmulh_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfrsub_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsrl_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsrl_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsrl_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmadd_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsra_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsra_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsra_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmadd_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmadd_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssrl_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssrl_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssrl_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmsub_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssra_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssra_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssra_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnmsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnmsub_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmsub_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnsrl_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnsrl_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnsrl_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmacc_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnsra_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnsra_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnsra_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmacc_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmacc_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnclipu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnclipu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnclipu_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmsac_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmsac_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnclip_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnclip_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnclip_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnmsac_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnmsac_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmsac_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmsac_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwredsumu_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwaddu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwaddu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwadd_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwredsum_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwadd_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwredsum_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsubu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsubu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwsub_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsub_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwredosum_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwaddu_wv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwaddu_wx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwadd_wv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwadd_wf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwadd_wv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwadd_wx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsubu_wv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsubu_wx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwsub_wv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwsub_wf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsub_wv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsub_wx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmulu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmulu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwmul_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwmul_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmulsu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmulsu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmul_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmul_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmaccu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmaccu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmaccu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmaccu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwmacc_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmacc_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmacc_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwnmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwnmacc_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmaccsu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmaccsu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmaccsu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmaccsu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwmsac_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwmsac_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmaccus_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmaccus_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwnmsac_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwnmsac_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32)
>>> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
>>> index 77f794e..d125ff9 100644
>>> --- a/target/riscv/insn32.decode
>>> +++ b/target/riscv/insn32.decode
>>> @@ -25,7 +25,7 @@
>>> %sh10 20:10
>>> %csr 20:12
>>> %rm 12:3
>>> -
>>> +%nf 29:3
>>> # immediates:
>>> %imm_i 20:s12
>>> %imm_s 25:s7 7:5
>>> @@ -43,7 +43,6 @@
>>> &u imm rd
>>> &shift shamt rs1 rd
>>> &atomic aq rl rs2 rs1 rd
>>> -
>>> # Formats 32:
>>> @r ....... ..... ..... ... ..... ....... &r
%rs2 %rs1 %rd
>>> @i ............ ..... ... ..... ....... &i imm=%imm_i
%rs1 %rd
>>> @@ -62,11 +61,17 @@
>>> @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
>>> @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd
>>> @r2 ....... ..... ..... ... ..... ....... %rs1 %rd
>>> +@r_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
>>> +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
>>> +@r_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
>>> +@r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd
>>> +@r2_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rd
>>> +@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd
>>> +@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
>>>
>>> @sfence_vma ....... ..... ..... ... ..... ....... %rs2 %rs1
>>> @sfence_vm ....... ..... ..... ... ..... ....... %rs1
>>>
>>> -
>>> # *** Privileged Instructions ***
>>> ecall 000000000000 00000 000 00000 1110011
>>> ebreak 000000000001 00000 000 00000 1110011
>>> @@ -203,3 +208,366 @@ fcvt_w_d 1100001 00000 ..... ... ..... 1010011
@r2_rm
>>> fcvt_wu_d 1100001 00001 ..... ... ..... 1010011 @r2_rm
>>> fcvt_d_w 1101001 00000 ..... ... ..... 1010011 @r2_rm
>>> fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm
>>> +
>>> +# *** RV32V Standard Extension ***
>>> +
>>> +# *** Vector loads and stores are encoded within LOADFP/STORE-FP ***
>>> +vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm
>>> +vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm
>>> +vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm
>>> +vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm
>>> +vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm
>>> +vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm
>>> +vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm
>>> +vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm
>>> +vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm
>>> +vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm
>>> +vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm
>>> +vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm
>>> +vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm
>>> +vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm
>>> +vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm
>>> +vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm
>>> +vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm
>>> +vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm
>>> +
>>> +vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm
>>> +vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm
>>> +vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm
>>> +vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm
>>> +vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm
>>> +vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm
>>> +vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm
>>> +vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm
>>> +vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm
>>> +vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm
>>> +vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm
>>> +
>>> +vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm
>>> +vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm
>>> +vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm
>>> +vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm
>>> +vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm
>>> +vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm
>>> +vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm
>>> +vsxb_v ... 011 . ..... ..... 000 ..... 0100111 @r_nfvm
>>> +vsxh_v ... 011 . ..... ..... 101 ..... 0100111 @r_nfvm
>>> +vsxw_v ... 011 . ..... ..... 110 ..... 0100111 @r_nfvm
>>> +vsxe_v ... 011 . ..... ..... 111 ..... 0100111 @r_nfvm
>>> +vsuxb_v ... 111 . ..... ..... 000 ..... 0100111 @r_nfvm
>>> +vsuxh_v ... 111 . ..... ..... 101 ..... 0100111 @r_nfvm
>>> +vsuxw_v ... 111 . ..... ..... 110 ..... 0100111 @r_nfvm
>>> +vsuxe_v ... 111 . ..... ..... 111 ..... 0100111 @r_nfvm
>>> +
>>> +#*** Vector AMO operations are encoded under the standard AMO major
opcode.***
>>> +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +
>>> +#*** new major opcode OP-V ***
>>> +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfredsum_vs 000001 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r
>>> +vfmv_f_s 001100 1 ..... ..... 001 ..... 1010111 @r
>>> +vmv_s_x 001101 1 ..... ..... 110 ..... 1010111 @r
>>> +vfmv_s_f 001101 1 ..... ..... 101 ..... 1010111 @r
>>> +vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r
>>> +vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r
>>> +vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r
>>> +vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r
>>> +vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r
>>> +vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r
>>> +vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r
>>> +vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r
>>> +vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r
>>> +vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r
>>> +vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm
>>> +vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm
>>> +vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm
>>> +vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm
>>> +vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm
>>> +viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm
>>> +vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm
>>> +vmerge_vvm 010111 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmerge_vxm 010111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmerge_vim 010111 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r
>>> +vfmerge_vfm 010111 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r
>>> +vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r
>>> +vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r
>>> +vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r
>>> +vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r
>>> +vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r
>>> +vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r
>>> +vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r
>>> +vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm
>>> +vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm
>>> +vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm
>>> +vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm
>>> +vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm
>>> +vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm
>>> +vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm
>>> +vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm
>>> +vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm
>>> +vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm
>>> +vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm
>>> +vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm
>>> +vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm
>>> +vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm
>>> +vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm
>>> +vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm
>>> +vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwredsum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
>>> +vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
>>> diff --git a/target/riscv/insn_trans/trans_rvv.inc.c
b/target/riscv/insn_trans/trans_rvv.inc.c
>>> new file mode 100644
>>> index 0000000..dc8e6ce
>>> --- /dev/null
>>> +++ b/target/riscv/insn_trans/trans_rvv.inc.c
>>> @@ -0,0 +1,484 @@
>>> +/*
>>> + * RISC-V translation routines for the RVV Standard Extension.
>>> + *
>>> + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved.
>>> + *
>>> + * This program is free software; you can redistribute it and/or
modify it
>>> + * under the terms and conditions of the GNU General Public License,
>>> + * version 2 or later, as published by the Free Software Foundation.
>>> + *
>>> + * This program is distributed in the hope it will be useful, but
WITHOUT
>>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or
>>> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for
>>> + * more details.
>>> + *
>>> + * You should have received a copy of the GNU General Public License
along with
>>> + * this program. If not, see <http://www.gnu.org/licenses/>.
>>> + */
>>> +
>>> +#define GEN_VECTOR_R2_NFVM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + TCGv_i32 nf = tcg_const_i32(a->nf); \
>>> + TCGv_i32 vm = tcg_const_i32(a->vm); \
>>> + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, d); \
>>> + tcg_temp_free_i32(s1); \
>>> + tcg_temp_free_i32(d); \
>>> + tcg_temp_free_i32(nf); \
>>> + tcg_temp_free_i32(vm); \
>>> + return true; \
>>> +}
>>> +#define GEN_VECTOR_R_NFVM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
>>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + TCGv_i32 nf = tcg_const_i32(a->nf); \
>>> + TCGv_i32 vm = tcg_const_i32(a->vm); \
>>> + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, s2, d);\
>>> + tcg_temp_free_i32(s1); \
>>> + tcg_temp_free_i32(s2); \
>>> + tcg_temp_free_i32(d); \
>>> + tcg_temp_free_i32(nf); \
>>> + tcg_temp_free_i32(vm); \
>>> + return true; \
>>> +}
>>> +
>>> +#define GEN_VECTOR_R_WDVM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
>>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + TCGv_i32 wd = tcg_const_i32(a->wd); \
>>> + TCGv_i32 vm = tcg_const_i32(a->vm); \
>>> + gen_helper_vector_##INSN(cpu_env, wd, vm, s1, s2, d);\
>>> + tcg_temp_free_i32(s1); \
>>> + tcg_temp_free_i32(s2); \
>>> + tcg_temp_free_i32(d); \
>>> + tcg_temp_free_i32(wd); \
>>> + tcg_temp_free_i32(vm); \
>>> + return true; \
>>> +}
>>> +#define GEN_VECTOR_R(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
>>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + gen_helper_vector_##INSN(cpu_env, s1, s2, d); \
>>> + tcg_temp_free_i32(s1); \
>>> + tcg_temp_free_i32(s2); \
>>> + tcg_temp_free_i32(d); \
>>> + return true; \
>>> +}
>>> +#define GEN_VECTOR_R2_VM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + TCGv_i32 vm = tcg_const_i32(a->vm); \
>>> + gen_helper_vector_##INSN(cpu_env, vm, s2, d); \
>>> + tcg_temp_free_i32(s2); \
>>> + tcg_temp_free_i32(d); \
>>> + tcg_temp_free_i32(vm); \
>>> + return true; \
>>> +}
>>> +
>>> +#define GEN_VECTOR_R1_VM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + TCGv_i32 vm = tcg_const_i32(a->vm); \
>>> + gen_helper_vector_##INSN(cpu_env, vm, d); \
>>> + tcg_temp_free_i32(d); \
>>> + tcg_temp_free_i32(vm); \
>>> + return true; \
>>> +}
>>> +#define GEN_VECTOR_R_VM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
>>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + TCGv_i32 vm = tcg_const_i32(a->vm); \
>>> + gen_helper_vector_##INSN(cpu_env, vm, s1, s2, d); \
>>> + tcg_temp_free_i32(s1); \
>>> + tcg_temp_free_i32(s2); \
>>> + tcg_temp_free_i32(d); \
>>> + tcg_temp_free_i32(vm); \
>>> + return true; \
>>> +}
>>> +#define GEN_VECTOR_R2_ZIMM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
>>> + TCGv_i32 zimm = tcg_const_i32(a->zimm); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + gen_helper_vector_##INSN(cpu_env, s1, zimm, d); \
>>> + tcg_temp_free_i32(s1); \
>>> + tcg_temp_free_i32(zimm); \
>>> + tcg_temp_free_i32(d); \
>>> + return true; \
>>> +}
>>> +
>>> +GEN_VECTOR_R2_NFVM(vlb_v)
>>> +GEN_VECTOR_R2_NFVM(vlh_v)
>
> ...
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-29 18:32 ` Aleksandar Markovic
0 siblings, 0 replies; 52+ messages in thread
From: Aleksandar Markovic @ 2019-08-29 18:32 UTC (permalink / raw)
To: liuzhiwei
Cc: Peter Maydell, Riku Voipio, Alistair Francis, open list:RISC-V,
Alex Bennée, Aurelien Jarno, Sagar Karandikar,
Palmer Dabbelt, Laurent Vivier, Bastian Koppelmann,
QEMU Developers
[-- Attachment #1: Type: text/plain, Size: 76055 bytes --]
29.08.2019. 15.02, "liuzhiwei" <zhiwei_liu@c-sky.com> је написао/ла:
>
>
> On 2019/8/29 上午3:20, Aleksandar Markovic wrote:
>>
>>
>>
>> > On Wed, Aug 28, 2019 at 9:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>>>
>>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>>> ---
>>
>>
>> Such large patch and "Change-Id:
I3cf891bc400713b95f47ecca82b1bf773f3dcb25" is its entire commit message??
Horrible.
>
> Hi, Aleksandar
>
> I am so sorry. A patch set with cover letter will be sent later.
>
> Best Regards,
>
> Zhiwei
OK, Zhiwei,
You'll soon get more used to participating in open source, and write much
better patches.
Try to follow guidelines described at
https://wiki.qemu.org/Contribute/SubmitAPatch
Thanks,
Aleksandar
>>
>> Aleksandar
>>
>>>
>>> fpu/softfloat.c | 119 +
>>> include/fpu/softfloat.h | 4 +
>>> linux-user/riscv/cpu_loop.c | 8 +-
>>> target/riscv/Makefile.objs | 2 +-
>>> target/riscv/cpu.h | 30 +
>>> target/riscv/cpu_bits.h | 15 +
>>> target/riscv/cpu_helper.c | 7 +
>>> target/riscv/csr.c | 65 +-
>>> target/riscv/helper.h | 354 +
>>> target/riscv/insn32.decode | 374 +-
>>> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>>> target/riscv/translate.c | 1 +
>>> target/riscv/vector_helper.c | 26563
++++++++++++++++++++++++++++++
>>> 13 files changed, 28017 insertions(+), 9 deletions(-)
>>> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
>>> create mode 100644 target/riscv/vector_helper.c
>>>
>>> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
>>> index 2ba36ec..da155ea 100644
>>> --- a/fpu/softfloat.c
>>> +++ b/fpu/softfloat.c
>>> @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a)
>>> }
>>>
>>>
/*----------------------------------------------------------------------------
>>> +| Returns the sign bit of the half-precision floating-point value `a'.
>>>
+*----------------------------------------------------------------------------*/
>>> +
>>> +static inline flag extractFloat16Sign(float16 a)
>>> +{
>>> + return float16_val(a) >> 0xf;
>>> +}
>>> +
>>> +
>>>
+/*----------------------------------------------------------------------------
>>> | Returns the fraction bits of the single-precision floating-point
value `a'.
>>>
*----------------------------------------------------------------------------*/
>>>
>>> @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b,
float_status *status)
>>> }
>>>
>>>
/*----------------------------------------------------------------------------
>>> +| Returns 1 if the half-precision floating-point value `a' is less than
>>> +| or equal to the corresponding value `b', and 0 otherwise. The
invalid
>>> +| exception is raised if either operand is a NaN. The comparison is
performed
>>> +| according to the IEC/IEEE Standard for Binary Floating-Point
Arithmetic.
>>>
+*----------------------------------------------------------------------------*/
>>> +
>>> +int float16_le(float16 a, float16 b, float_status *status)
>>> +{
>>> + flag aSign, bSign;
>>> + uint16_t av, bv;
>>> + a = float16_squash_input_denormal(a, status);
>>> + b = float16_squash_input_denormal(b, status);
>>> +
>>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) &&
extractFloat16Frac( a ) )
>>> + || ( ( extractFloat16Exp( b ) == 0x1F ) &&
extractFloat16Frac( b ) )
>>> + ) {
>>> + float_raise(float_flag_invalid, status);
>>> + return 0;
>>> + }
>>> + aSign = extractFloat16Sign( a );
>>> + bSign = extractFloat16Sign( b );
>>> + av = float16_val(a);
>>> + bv = float16_val(b);
>>> + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv
)<<1 ) == 0 );
>>> + return ( av == bv ) || ( aSign ^ ( av < bv ) );
>>> +
>>> +}
>>> +
>>>
+/*----------------------------------------------------------------------------
>>> | Returns 1 if the single-precision floating-point value `a' is less
than
>>> | or equal to the corresponding value `b', and 0 otherwise. The
invalid
>>> | exception is raised if either operand is a NaN. The comparison is
performed
>>> @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b,
float_status *status)
>>> | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>>>
*----------------------------------------------------------------------------*/
>>>
>>> +int float16_lt(float16 a, float16 b, float_status *status)
>>> +{
>>> + flag aSign, bSign;
>>> + uint16_t av, bv;
>>> + a = float16_squash_input_denormal(a, status);
>>> + b = float16_squash_input_denormal(b, status);
>>> +
>>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) &&
extractFloat16Frac( a ) )
>>> + || ( ( extractFloat16Exp( b ) == 0x1F ) &&
extractFloat16Frac( b ) )
>>> + ) {
>>> + float_raise(float_flag_invalid, status);
>>> + return 0;
>>> + }
>>> + aSign = extractFloat16Sign( a );
>>> + bSign = extractFloat16Sign( b );
>>> + av = float16_val(a);
>>> + bv = float16_val(b);
>>> + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv
)<<1 ) != 0 );
>>> + return ( av != bv ) && ( aSign ^ ( av < bv ) );
>>> +
>>> +}
>>> +
>>>
+/*----------------------------------------------------------------------------
>>> +| Returns 1 if the single-precision floating-point value `a' is less
than
>>> +| the corresponding value `b', and 0 otherwise. The invalid exception
is
>>> +| raised if either operand is a NaN. The comparison is performed
according
>>> +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
>>>
+*----------------------------------------------------------------------------*/
>>> +
>>> int float32_lt(float32 a, float32 b, float_status *status)
>>> {
>>> flag aSign, bSign;
>>> @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b,
float_status *status)
>>> }
>>>
>>>
/*----------------------------------------------------------------------------
>>> +| Returns 1 if the half-precision floating-point value `a' is equal to
>>> +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not
cause an
>>> +| exception. The comparison is performed according to the IEC/IEEE
Standard
>>> +| for Binary Floating-Point Arithmetic.
>>>
+*----------------------------------------------------------------------------*/
>>> +
>>> +int float16_eq_quiet(float16 a, float16 b, float_status *status)
>>> +{
>>> + a = float16_squash_input_denormal(a, status);
>>> + b = float16_squash_input_denormal(b, status);
>>> +
>>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) &&
extractFloat16Frac( a ) )
>>> + || ( ( extractFloat16Exp( b ) == 0x1F ) &&
extractFloat16Frac( b ) )
>>> + ) {
>>> + if (float16_is_signaling_nan(a, status)
>>> + || float16_is_signaling_nan(b, status)) {
>>> + float_raise(float_flag_invalid, status);
>>> + }
>>> + return 0;
>>> + }
>>> + return ( float16_val(a) == float16_val(b) ) ||
>>> + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) ==
0 );
>>> +}
>>> +
>>> +
>>>
+/*----------------------------------------------------------------------------
>>> | Returns 1 if the single-precision floating-point value `a' is equal
to
>>> | the corresponding value `b', and 0 otherwise. Quiet NaNs do not
cause an
>>> | exception. The comparison is performed according to the IEC/IEEE
Standard
>>> @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b,
float_status *status)
>>> }
>>>
>>>
/*----------------------------------------------------------------------------
>>> +| Returns 1 if the half-precision floating-point values `a' and `b'
cannot
>>> +| be compared, and 0 otherwise. Quiet NaNs do not cause an
exception. The
>>> +| comparison is performed according to the IEC/IEEE Standard for Binary
>>> +| Floating-Point Arithmetic.
>>>
+*----------------------------------------------------------------------------*/
>>> +
>>> +int float16_unordered_quiet(float16 a, float16 b, float_status *status)
>>> +{
>>> + a = float16_squash_input_denormal(a, status);
>>> + b = float16_squash_input_denormal(b, status);
>>> +
>>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) &&
extractFloat16Frac( a ) )
>>> + || ( ( extractFloat16Exp( b ) == 0x1F ) &&
extractFloat16Frac( b ) )
>>> + ) {
>>> + if (float16_is_signaling_nan(a, status)
>>> + || float16_is_signaling_nan(b, status)) {
>>> + float_raise(float_flag_invalid, status);
>>> + }
>>> + return 1;
>>> + }
>>> + return 0;
>>> +}
>>> +
>>> +
>>>
+/*----------------------------------------------------------------------------
>>> | Returns 1 if the single-precision floating-point values `a' and `b'
cannot
>>> | be compared, and 0 otherwise. Quiet NaNs do not cause an
exception. The
>>> | comparison is performed according to the IEC/IEEE Standard for Binary
>>> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
>>> index 3ff3fa5..3b0754c 100644
>>> --- a/include/fpu/softfloat.h
>>> +++ b/include/fpu/softfloat.h
>>> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16,
float_status *status);
>>> float16 float16_sqrt(float16, float_status *status);
>>> int float16_compare(float16, float16, float_status *status);
>>> int float16_compare_quiet(float16, float16, float_status *status);
>>> +int float16_unordered_quiet(float16, float16, float_status *status);
>>> +int float16_le(float16, float16, float_status *status);
>>> +int float16_lt(float16, float16, float_status *status);
>>> +int float16_eq_quiet(float16, float16, float_status *status);
>>>
>>> int float16_is_quiet_nan(float16, float_status *status);
>>> int float16_is_signaling_nan(float16, float_status *status);
>>> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
>>> index 12aa3c0..b01548a 100644
>>> --- a/linux-user/riscv/cpu_loop.c
>>> +++ b/linux-user/riscv/cpu_loop.c
>>> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
>>> signum = 0;
>>> sigcode = 0;
>>> sigaddr = 0;
>>> -
>>> + if (env->foflag) {
>>> + if (env->vfp.vl != 0) {
>>> + env->foflag = false;
>>> + env->pc += 4;
>>> + continue;
>>> + }
>>> + }
>>> switch (trapnr) {
>>> case EXCP_INTERRUPT:
>>> /* just indicate that signals should be handled asap */
>>> diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
>>> index b1c79bc..d577cef 100644
>>> --- a/target/riscv/Makefile.objs
>>> +++ b/target/riscv/Makefile.objs
>>> @@ -1,4 +1,4 @@
>>> -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o
gdbstub.o pmp.o
>>> +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o
vector_helper.o gdbstub.o pmp.o
>>>
>>> DECODETREE = $(SRC_PATH)/scripts/decodetree.py
>>>
>>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>>> index 0adb307..5a93aa2 100644
>>> --- a/target/riscv/cpu.h
>>> +++ b/target/riscv/cpu.h
>>> @@ -67,6 +67,7 @@
>>> #define RVC RV('C')
>>> #define RVS RV('S')
>>> #define RVU RV('U')
>>> +#define RVV RV('V')
>>>
>>> /* S extension denotes that Supervisor mode exists, however it is
possible
>>> to have a core that support S mode but does not have an MMU and
there
>>> @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState;
>>>
>>> #include "pmp.h"
>>>
>>> +#define VLEN 128
>>> +#define VUNIT(x) (VLEN / x)
>>> +
>>> struct CPURISCVState {
>>> target_ulong gpr[32];
>>> uint64_t fpr[32]; /* assume both F and D extensions */
>>> +
>>> + /* vector coprocessor state. */
>>> + struct {
>>> + union VECTOR {
>>> + float64 f64[VUNIT(64)];
>>> + float32 f32[VUNIT(32)];
>>> + float16 f16[VUNIT(16)];
>>> + target_ulong ul[VUNIT(sizeof(target_ulong))];
>>> + uint64_t u64[VUNIT(64)];
>>> + int64_t s64[VUNIT(64)];
>>> + uint32_t u32[VUNIT(32)];
>>> + int32_t s32[VUNIT(32)];
>>> + uint16_t u16[VUNIT(16)];
>>> + int16_t s16[VUNIT(16)];
>>> + uint8_t u8[VUNIT(8)];
>>> + int8_t s8[VUNIT(8)];
>>> + } vreg[32];
>>> + target_ulong vxrm;
>>> + target_ulong vxsat;
>>> + target_ulong vl;
>>> + target_ulong vstart;
>>> + target_ulong vtype;
>>> + float_status fp_status;
>>> + } vfp;
>>> +
>>> + bool foflag;
>>> target_ulong pc;
>>> target_ulong load_res;
>>> target_ulong load_val;
>>> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
>>> index 11f971a..9eb43ec 100644
>>> --- a/target/riscv/cpu_bits.h
>>> +++ b/target/riscv/cpu_bits.h
>>> @@ -29,6 +29,14 @@
>>> #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT)
>>> #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA |
FSR_NXA)
>>>
>>> +/* Vector Fixed-Point round model */
>>> +#define FSR_VXRM_SHIFT 9
>>> +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT)
>>> +
>>> +/* Vector Fixed-Point saturation flag */
>>> +#define FSR_VXSAT_SHIFT 8
>>> +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT)
>>> +
>>> /* Control and Status Registers */
>>>
>>> /* User Trap Setup */
>>> @@ -48,6 +56,13 @@
>>> #define CSR_FRM 0x002
>>> #define CSR_FCSR 0x003
>>>
>>> +/* User Vector CSRs */
>>> +#define CSR_VSTART 0x008
>>> +#define CSR_VXSAT 0x009
>>> +#define CSR_VXRM 0x00a
>>> +#define CSR_VL 0xc20
>>> +#define CSR_VTYPE 0xc21
>>> +
>>> /* User Timers and Counters */
>>> #define CSR_CYCLE 0xc00
>>> #define CSR_TIME 0xc01
>>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>>> index e32b612..405caf6 100644
>>> --- a/target/riscv/cpu_helper.c
>>> +++ b/target/riscv/cpu_helper.c
>>> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
>>> [PRV_H] = RISCV_EXCP_H_ECALL,
>>> [PRV_M] = RISCV_EXCP_M_ECALL
>>> };
>>> + if (env->foflag) {
>>> + if (env->vfp.vl != 0) {
>>> + env->foflag = false;
>>> + env->pc += 4;
>>> + return;
>>> + }
>>> + }
>>>
>>> if (!async) {
>>> /* set tval to badaddr for traps with address information */
>>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
>>> index e0d4586..a6131ff 100644
>>> --- a/target/riscv/csr.c
>>> +++ b/target/riscv/csr.c
>>> @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno)
>>> return 0;
>>> }
>>>
>>> -#if !defined(CONFIG_USER_ONLY)
>>> static int any(CPURISCVState *env, int csrno)
>>> {
>>> return 0;
>>> }
>>>
>>> +#if !defined(CONFIG_USER_ONLY)
>>> static int smode(CPURISCVState *env, int csrno)
>>> {
>>> return -!riscv_has_ext(env, RVS);
>>> @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int
csrno, target_ulong *val)
>>> return -1;
>>> }
>>> #endif
>>> - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
>>> - | (env->frm << FSR_RD_SHIFT);
>>> + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT)
>>> + | (env->vfp.vxsat << FSR_VXSAT_SHIFT)
>>> + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
>>> + | (env->frm << FSR_RD_SHIFT);
>>> return 0;
>>> }
>>>
>>> @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int
csrno, target_ulong val)
>>> env->mstatus |= MSTATUS_FS;
>>> #endif
>>> env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
>>> + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
>>> + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
>>> riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
>>> return 0;
>>> }
>>>
>>> +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val)
>>> +{
>>> + *val = env->vfp.vtype;
>>> + return 0;
>>> +}
>>> +
>>> +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
>>> +{
>>> + *val = env->vfp.vl;
>>> + return 0;
>>> +}
>>> +
>>> +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val)
>>> +{
>>> + *val = env->vfp.vxrm;
>>> + return 0;
>>> +}
>>> +
>>> +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val)
>>> +{
>>> + *val = env->vfp.vxsat;
>>> + return 0;
>>> +}
>>> +
>>> +static int read_vstart(CPURISCVState *env, int csrno, target_ulong
*val)
>>> +{
>>> + *val = env->vfp.vstart;
>>> + return 0;
>>> +}
>>> +
>>> +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val)
>>> +{
>>> + env->vfp.vxrm = val;
>>> + return 0;
>>> +}
>>> +
>>> +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val)
>>> +{
>>> + env->vfp.vxsat = val;
>>> + return 0;
>>> +}
>>> +
>>> +static int write_vstart(CPURISCVState *env, int csrno, target_ulong
val)
>>> +{
>>> + env->vfp.vstart = val;
>>> + return 0;
>>> +}
>>> +
>>> /* User Timers and Counters */
>>> static int read_instret(CPURISCVState *env, int csrno, target_ulong
*val)
>>> {
>>> @@ -873,7 +925,12 @@ static riscv_csr_operations
csr_ops[CSR_TABLE_SIZE] = {
>>> [CSR_FFLAGS] = { fs, read_fflags,
write_fflags },
>>> [CSR_FRM] = { fs, read_frm, write_frm
},
>>> [CSR_FCSR] = { fs, read_fcsr, write_fcsr
},
>>> -
>>> + /* Vector CSRs */
>>> + [CSR_VSTART] = { any, read_vstart,
write_vstart },
>>> + [CSR_VXSAT] = { any, read_vxsat, write_vxsat
},
>>> + [CSR_VXRM] = { any, read_vxrm, write_vxrm
},
>>> + [CSR_VL] = { any, read_vl
},
>>> + [CSR_VTYPE] = { any, read_vtype
},
>>> /* User Timers and Counters */
>>> [CSR_CYCLE] = { ctr, read_instret
},
>>> [CSR_INSTRET] = { ctr, read_instret
},
>>> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
>>> index debb22a..fee02c0 100644
>>> --- a/target/riscv/helper.h
>>> +++ b/target/riscv/helper.h
>>> @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl)
>>> DEF_HELPER_1(wfi, void, env)
>>> DEF_HELPER_1(tlb_flush, void, env)
>>> #endif
>>> +/* Vector functions */
>>> +DEF_HELPER_5(vector_vlb_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlh_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlw_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vle_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlbu_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlhu_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlwu_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlbff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlhff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlwff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vleff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlbuff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlhuff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vlwuff_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsb_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsh_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsw_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vse_v, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlsb_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlsh_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlsw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlse_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlsbu_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlshu_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlswu_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vssb_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vssh_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vssw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsse_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxb_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxh_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxe_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxbu_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxhu_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vlxwu_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsxb_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsxh_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsxw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsxe_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsuxb_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsuxh_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsuxw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vsuxe_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoswapw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoswapd_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoaddw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoaddd_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoxorw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoxord_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoandw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoandd_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoorw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamoord_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamominw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamomind_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamomaxw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamomaxd_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamominuw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamominud_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamomaxuw_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_6(vector_vamomaxud_v, void, env, i32, i32, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vext_x_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfmv_f_s, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmv_s_x, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfmv_s_f, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vadc_vvm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vadc_vxm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vadc_vim, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmadc_vvm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmadc_vxm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmadc_vim, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vsbc_vvm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vsbc_vxm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmsbc_vvm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmsbc_vxm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmpopc_m, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmfirst_m, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vcompress_vm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmandnot_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmand_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmor_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmxor_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmornot_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmnand_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmnor_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmxnor_mm, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmsbf_m, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmsof_m, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vmsif_m, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_viota_m, void, env, i32, i32, i32)
>>> +DEF_HELPER_3(vector_vid_v, void, env, i32, i32)
>>> +DEF_HELPER_4(vector_vfcvt_xu_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfcvt_x_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfcvt_f_xu_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfcvt_f_x_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfwcvt_xu_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfwcvt_x_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfwcvt_f_xu_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfwcvt_f_x_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfwcvt_f_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfncvt_xu_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfncvt_x_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfncvt_f_xu_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfncvt_f_x_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfncvt_f_f_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfsqrt_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vfclass_v, void, env, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vadd_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vadd_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredsum_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfadd_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredand_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfredsum_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsub_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredor_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsub_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrsub_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrsub_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredxor_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfredosum_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vminu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vminu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredminu_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmin_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmin_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmin_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmin_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredmin_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfredmin_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmaxu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmaxu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredmaxu_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmax_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmax_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmax_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmax_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vredmax_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfredmax_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsgnj_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsgnj_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vand_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vand_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vand_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsgnjn_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsgnjn_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vor_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vor_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vor_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsgnjx_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfsgnjx_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vxor_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vxor_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vxor_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrgather_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrgather_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrgather_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vslideup_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vslideup_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vslide1up_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vslidedown_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vslidedown_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vslide1down_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmerge_vvm, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmerge_vxm, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmerge_vim, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmerge_vfm, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmseq_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmseq_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmseq_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfeq_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfeq_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsne_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsne_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsne_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfle_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfle_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsltu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsltu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmford_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmford_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmslt_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmslt_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmflt_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmflt_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsleu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsleu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsleu_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfne_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfne_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsle_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsle_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsle_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfgt_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsgtu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsgtu_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsgt_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmsgt_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmfge_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsaddu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsaddu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsaddu_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vdivu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vdivu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfdiv_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfdiv_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsadd_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsadd_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vdiv_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vdiv_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfrdiv_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssubu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssubu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vremu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vremu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssub_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrem_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vrem_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vaadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vaadd_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vaadd_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmulhu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmulhu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmul_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmul_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsll_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsll_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsll_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmul_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmul_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vasub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vasub_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmulhsu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmulhsu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsmul_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsmul_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmulh_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmulh_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfrsub_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsrl_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsrl_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsrl_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmadd_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsra_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsra_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vsra_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmadd_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmadd_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssrl_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssrl_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssrl_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmsub_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssra_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssra_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vssra_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnmsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnmsub_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmsub_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnsrl_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnsrl_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnsrl_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmacc_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnsra_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnsra_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnsra_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vmacc_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmacc_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnclipu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnclipu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnclipu_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmsac_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfmsac_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnclip_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnclip_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnclip_vi, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnmsac_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vnmsac_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmsac_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfnmsac_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwredsumu_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwaddu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwaddu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwadd_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwredsum_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwadd_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwadd_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwredsum_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsubu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsubu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwsub_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsub_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsub_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwredosum_vs, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwaddu_wv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwaddu_wx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwadd_wv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwadd_wf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwadd_wv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwadd_wx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsubu_wv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsubu_wx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwsub_wv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwsub_wf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsub_wv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsub_wx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmulu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmulu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwmul_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwmul_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmulsu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmulsu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmul_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmul_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmaccu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmaccu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmaccu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmaccu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwmacc_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmacc_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmacc_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwnmacc_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwnmacc_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmaccsu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmaccsu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmaccsu_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmaccsu_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwmsac_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwmsac_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwsmaccus_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vwmaccus_vx, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwnmsac_vv, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_5(vector_vfwnmsac_vf, void, env, i32, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32)
>>> +DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32)
>>> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
>>> index 77f794e..d125ff9 100644
>>> --- a/target/riscv/insn32.decode
>>> +++ b/target/riscv/insn32.decode
>>> @@ -25,7 +25,7 @@
>>> %sh10 20:10
>>> %csr 20:12
>>> %rm 12:3
>>> -
>>> +%nf 29:3
>>> # immediates:
>>> %imm_i 20:s12
>>> %imm_s 25:s7 7:5
>>> @@ -43,7 +43,6 @@
>>> &u imm rd
>>> &shift shamt rs1 rd
>>> &atomic aq rl rs2 rs1 rd
>>> -
>>> # Formats 32:
>>> @r ....... ..... ..... ... ..... ....... &r
%rs2 %rs1 %rd
>>> @i ............ ..... ... ..... ....... &i imm=%imm_i
%rs1 %rd
>>> @@ -62,11 +61,17 @@
>>> @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
>>> @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd
>>> @r2 ....... ..... ..... ... ..... ....... %rs1 %rd
>>> +@r_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
>>> +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
>>> +@r_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
>>> +@r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd
>>> +@r2_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rd
>>> +@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd
>>> +@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
>>>
>>> @sfence_vma ....... ..... ..... ... ..... ....... %rs2 %rs1
>>> @sfence_vm ....... ..... ..... ... ..... ....... %rs1
>>>
>>> -
>>> # *** Privileged Instructions ***
>>> ecall 000000000000 00000 000 00000 1110011
>>> ebreak 000000000001 00000 000 00000 1110011
>>> @@ -203,3 +208,366 @@ fcvt_w_d 1100001 00000 ..... ... ..... 1010011
@r2_rm
>>> fcvt_wu_d 1100001 00001 ..... ... ..... 1010011 @r2_rm
>>> fcvt_d_w 1101001 00000 ..... ... ..... 1010011 @r2_rm
>>> fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm
>>> +
>>> +# *** RV32V Standard Extension ***
>>> +
>>> +# *** Vector loads and stores are encoded within LOADFP/STORE-FP ***
>>> +vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm
>>> +vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm
>>> +vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm
>>> +vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm
>>> +vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm
>>> +vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm
>>> +vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm
>>> +vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm
>>> +vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm
>>> +vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm
>>> +vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm
>>> +vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm
>>> +vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm
>>> +vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm
>>> +vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm
>>> +vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm
>>> +vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm
>>> +vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm
>>> +
>>> +vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm
>>> +vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm
>>> +vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm
>>> +vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm
>>> +vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm
>>> +vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm
>>> +vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm
>>> +vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm
>>> +vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm
>>> +vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm
>>> +vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm
>>> +
>>> +vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm
>>> +vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm
>>> +vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm
>>> +vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm
>>> +vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm
>>> +vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm
>>> +vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm
>>> +vsxb_v ... 011 . ..... ..... 000 ..... 0100111 @r_nfvm
>>> +vsxh_v ... 011 . ..... ..... 101 ..... 0100111 @r_nfvm
>>> +vsxw_v ... 011 . ..... ..... 110 ..... 0100111 @r_nfvm
>>> +vsxe_v ... 011 . ..... ..... 111 ..... 0100111 @r_nfvm
>>> +vsuxb_v ... 111 . ..... ..... 000 ..... 0100111 @r_nfvm
>>> +vsuxh_v ... 111 . ..... ..... 101 ..... 0100111 @r_nfvm
>>> +vsuxw_v ... 111 . ..... ..... 110 ..... 0100111 @r_nfvm
>>> +vsuxe_v ... 111 . ..... ..... 111 ..... 0100111 @r_nfvm
>>> +
>>> +#*** Vector AMO operations are encoded under the standard AMO major
opcode.***
>>> +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm
>>> +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm
>>> +
>>> +#*** new major opcode OP-V ***
>>> +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfredsum_vs 000001 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r
>>> +vfmv_f_s 001100 1 ..... ..... 001 ..... 1010111 @r
>>> +vmv_s_x 001101 1 ..... ..... 110 ..... 1010111 @r
>>> +vfmv_s_f 001101 1 ..... ..... 101 ..... 1010111 @r
>>> +vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r
>>> +vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r
>>> +vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r
>>> +vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r
>>> +vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r
>>> +vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r
>>> +vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r
>>> +vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r
>>> +vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r
>>> +vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r
>>> +vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm
>>> +vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm
>>> +vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm
>>> +vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm
>>> +vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm
>>> +viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm
>>> +vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm
>>> +vmerge_vvm 010111 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmerge_vxm 010111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmerge_vim 010111 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r
>>> +vfmerge_vfm 010111 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r
>>> +vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r
>>> +vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r
>>> +vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r
>>> +vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r
>>> +vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r
>>> +vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r
>>> +vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r
>>> +vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm
>>> +vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm
>>> +vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm
>>> +vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm
>>> +vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm
>>> +vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm
>>> +vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm
>>> +vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm
>>> +vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm
>>> +vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm
>>> +vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm
>>> +vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm
>>> +vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm
>>> +vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm
>>> +vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm
>>> +vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm
>>> +vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm
>>> +vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwredsum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm
>>> +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm
>>> +vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm
>>> +vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm
>>> +vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm
>>> +vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm
>>> +vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
>>> +vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
>>> diff --git a/target/riscv/insn_trans/trans_rvv.inc.c
b/target/riscv/insn_trans/trans_rvv.inc.c
>>> new file mode 100644
>>> index 0000000..dc8e6ce
>>> --- /dev/null
>>> +++ b/target/riscv/insn_trans/trans_rvv.inc.c
>>> @@ -0,0 +1,484 @@
>>> +/*
>>> + * RISC-V translation routines for the RVV Standard Extension.
>>> + *
>>> + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved.
>>> + *
>>> + * This program is free software; you can redistribute it and/or
modify it
>>> + * under the terms and conditions of the GNU General Public License,
>>> + * version 2 or later, as published by the Free Software Foundation.
>>> + *
>>> + * This program is distributed in the hope it will be useful, but
WITHOUT
>>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or
>>> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for
>>> + * more details.
>>> + *
>>> + * You should have received a copy of the GNU General Public License
along with
>>> + * this program. If not, see <http://www.gnu.org/licenses/>.
>>> + */
>>> +
>>> +#define GEN_VECTOR_R2_NFVM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + TCGv_i32 nf = tcg_const_i32(a->nf); \
>>> + TCGv_i32 vm = tcg_const_i32(a->vm); \
>>> + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, d); \
>>> + tcg_temp_free_i32(s1); \
>>> + tcg_temp_free_i32(d); \
>>> + tcg_temp_free_i32(nf); \
>>> + tcg_temp_free_i32(vm); \
>>> + return true; \
>>> +}
>>> +#define GEN_VECTOR_R_NFVM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
>>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + TCGv_i32 nf = tcg_const_i32(a->nf); \
>>> + TCGv_i32 vm = tcg_const_i32(a->vm); \
>>> + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, s2, d);\
>>> + tcg_temp_free_i32(s1); \
>>> + tcg_temp_free_i32(s2); \
>>> + tcg_temp_free_i32(d); \
>>> + tcg_temp_free_i32(nf); \
>>> + tcg_temp_free_i32(vm); \
>>> + return true; \
>>> +}
>>> +
>>> +#define GEN_VECTOR_R_WDVM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
>>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + TCGv_i32 wd = tcg_const_i32(a->wd); \
>>> + TCGv_i32 vm = tcg_const_i32(a->vm); \
>>> + gen_helper_vector_##INSN(cpu_env, wd, vm, s1, s2, d);\
>>> + tcg_temp_free_i32(s1); \
>>> + tcg_temp_free_i32(s2); \
>>> + tcg_temp_free_i32(d); \
>>> + tcg_temp_free_i32(wd); \
>>> + tcg_temp_free_i32(vm); \
>>> + return true; \
>>> +}
>>> +#define GEN_VECTOR_R(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
>>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + gen_helper_vector_##INSN(cpu_env, s1, s2, d); \
>>> + tcg_temp_free_i32(s1); \
>>> + tcg_temp_free_i32(s2); \
>>> + tcg_temp_free_i32(d); \
>>> + return true; \
>>> +}
>>> +#define GEN_VECTOR_R2_VM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + TCGv_i32 vm = tcg_const_i32(a->vm); \
>>> + gen_helper_vector_##INSN(cpu_env, vm, s2, d); \
>>> + tcg_temp_free_i32(s2); \
>>> + tcg_temp_free_i32(d); \
>>> + tcg_temp_free_i32(vm); \
>>> + return true; \
>>> +}
>>> +
>>> +#define GEN_VECTOR_R1_VM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + TCGv_i32 vm = tcg_const_i32(a->vm); \
>>> + gen_helper_vector_##INSN(cpu_env, vm, d); \
>>> + tcg_temp_free_i32(d); \
>>> + tcg_temp_free_i32(vm); \
>>> + return true; \
>>> +}
>>> +#define GEN_VECTOR_R_VM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
>>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + TCGv_i32 vm = tcg_const_i32(a->vm); \
>>> + gen_helper_vector_##INSN(cpu_env, vm, s1, s2, d); \
>>> + tcg_temp_free_i32(s1); \
>>> + tcg_temp_free_i32(s2); \
>>> + tcg_temp_free_i32(d); \
>>> + tcg_temp_free_i32(vm); \
>>> + return true; \
>>> +}
>>> +#define GEN_VECTOR_R2_ZIMM(INSN) \
>>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
>>> +{ \
>>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \
>>> + TCGv_i32 zimm = tcg_const_i32(a->zimm); \
>>> + TCGv_i32 d = tcg_const_i32(a->rd); \
>>> + gen_helper_vector_##INSN(cpu_env, s1, zimm, d); \
>>> + tcg_temp_free_i32(s1); \
>>> + tcg_temp_free_i32(zimm); \
>>> + tcg_temp_free_i32(d); \
>>> + return true; \
>>> +}
>>> +
>>> +GEN_VECTOR_R2_NFVM(vlb_v)
>>> +GEN_VECTOR_R2_NFVM(vlh_v)
>
> ...
[-- Attachment #2: Type: text/html, Size: 98200 bytes --]
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-29 12:00 ` [Qemu-riscv] " liuzhiwei
@ 2019-08-29 21:50 ` Alistair Francis
-1 siblings, 0 replies; 52+ messages in thread
From: Alistair Francis @ 2019-08-29 21:50 UTC (permalink / raw)
To: liuzhiwei
Cc: Peter Maydell, Riku Voipio, open list:RISC-V, Sagar Karandikar,
Bastian Koppelmann, Palmer Dabbelt,
qemu-devel@nongnu.org Developers, Laurent Vivier,
Alistair Francis, Alex Bennée, Aurelien Jarno
On Thu, Aug 29, 2019 at 5:05 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>
> On 2019/8/29 上午5:34, Alistair Francis wrote:
> > On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
> >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
> >> ---
> >> fpu/softfloat.c | 119 +
> >> include/fpu/softfloat.h | 4 +
> >> linux-user/riscv/cpu_loop.c | 8 +-
> >> target/riscv/Makefile.objs | 2 +-
> >> target/riscv/cpu.h | 30 +
> >> target/riscv/cpu_bits.h | 15 +
> >> target/riscv/cpu_helper.c | 7 +
> >> target/riscv/csr.c | 65 +-
> >> target/riscv/helper.h | 354 +
> >> target/riscv/insn32.decode | 374 +-
> >> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> >> target/riscv/translate.c | 1 +
> >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
> >> 13 files changed, 28017 insertions(+), 9 deletions(-)
> >> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> >> create mode 100644 target/riscv/vector_helper.c
> >>
> > Hello,
> >
> > Thanks for the patch!
> >
> > As others have pointed out you will need to split the patch up into
> > multiple smaller patches, otherwise it is too hard to review almost
> > 30,000 lines of code.
>
> Hi, Alistair
>
> I'm so sorry for the inconvenience. It will be a patch set with a cover
> letter in V2.
No worries.
>
> > Can you also include a cover letter with your patch series describing
> > how you are testing this? AFAIK vector extension support isn't in any
> > compiler so I'm assuming you are handwriting the assembly or have
> > toolchain patches. Either way it will help if you can share that so
> > others can test your implementation.
>
> Yes, it's handwriting assembly. The assembler in Binutils has support
> Vector extension. First define an function test_vadd_vv_8 in assembly
> and then it can be called from a C program.
>
> The function is something like
>
> /* vadd.vv */
> TEST_FUNC(test_vadd_vv_8)
> vsetvli t1, x0, e8, m2
> vlb.v v6, (a4)
> vsb.v v6, (a3)
> vsetvli t1, a0, e8, m2
> vlb.v v0, (a1)
> vlb.v v2, (a2)
> vadd.vv v4, v0, v2
> vsb.v v4, (a3)
> ret
> .size test_vadd_vv_8, .-test_vadd_vv_8
If possible it might be worth releasing the code that you are using for testing.
>
> It takes more time to test than to implement the instructions. Maybe
> there is some better test method or some forced test cases in QEMU.
> Could you give me some advice for testing?
Richard's idea of risu seems like a good option.
Thinking about it a bit more we are going to have other extensions in
the future that will need assembly testing so setting up a test
framework seems like a good idea. I am happy to help try and get this
going as well.
Alistair
>
> Best Regards,
>
> Zhiwei
>
> > Alex and Richard have kindly started the review. Once you have
> > addressed their comments and split this patch up into smaller patches
> > you can send a v2 and we can go from there.
> >
> > Once again thanks for doing this implementation for QEMU!
> >
> > Alistair
> >
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-29 21:50 ` Alistair Francis
0 siblings, 0 replies; 52+ messages in thread
From: Alistair Francis @ 2019-08-29 21:50 UTC (permalink / raw)
To: liuzhiwei
Cc: qemu-devel@nongnu.org Developers, open list:RISC-V,
Peter Maydell, Palmer Dabbelt, Sagar Karandikar,
Bastian Koppelmann, Riku Voipio, Laurent Vivier,
Alistair Francis, Alex Bennée, Aurelien Jarno
On Thu, Aug 29, 2019 at 5:05 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>
> On 2019/8/29 上午5:34, Alistair Francis wrote:
> > On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
> >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
> >> ---
> >> fpu/softfloat.c | 119 +
> >> include/fpu/softfloat.h | 4 +
> >> linux-user/riscv/cpu_loop.c | 8 +-
> >> target/riscv/Makefile.objs | 2 +-
> >> target/riscv/cpu.h | 30 +
> >> target/riscv/cpu_bits.h | 15 +
> >> target/riscv/cpu_helper.c | 7 +
> >> target/riscv/csr.c | 65 +-
> >> target/riscv/helper.h | 354 +
> >> target/riscv/insn32.decode | 374 +-
> >> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> >> target/riscv/translate.c | 1 +
> >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
> >> 13 files changed, 28017 insertions(+), 9 deletions(-)
> >> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> >> create mode 100644 target/riscv/vector_helper.c
> >>
> > Hello,
> >
> > Thanks for the patch!
> >
> > As others have pointed out you will need to split the patch up into
> > multiple smaller patches, otherwise it is too hard to review almost
> > 30,000 lines of code.
>
> Hi, Alistair
>
> I'm so sorry for the inconvenience. It will be a patch set with a cover
> letter in V2.
No worries.
>
> > Can you also include a cover letter with your patch series describing
> > how you are testing this? AFAIK vector extension support isn't in any
> > compiler so I'm assuming you are handwriting the assembly or have
> > toolchain patches. Either way it will help if you can share that so
> > others can test your implementation.
>
> Yes, it's handwriting assembly. The assembler in Binutils has support
> Vector extension. First define an function test_vadd_vv_8 in assembly
> and then it can be called from a C program.
>
> The function is something like
>
> /* vadd.vv */
> TEST_FUNC(test_vadd_vv_8)
> vsetvli t1, x0, e8, m2
> vlb.v v6, (a4)
> vsb.v v6, (a3)
> vsetvli t1, a0, e8, m2
> vlb.v v0, (a1)
> vlb.v v2, (a2)
> vadd.vv v4, v0, v2
> vsb.v v4, (a3)
> ret
> .size test_vadd_vv_8, .-test_vadd_vv_8
If possible it might be worth releasing the code that you are using for testing.
>
> It takes more time to test than to implement the instructions. Maybe
> there is some better test method or some forced test cases in QEMU.
> Could you give me some advice for testing?
Richard's idea of risu seems like a good option.
Thinking about it a bit more we are going to have other extensions in
the future that will need assembly testing so setting up a test
framework seems like a good idea. I am happy to help try and get this
going as well.
Alistair
>
> Best Regards,
>
> Zhiwei
>
> > Alex and Richard have kindly started the review. Once you have
> > addressed their comments and split this patch up into smaller patches
> > you can send a v2 and we can go from there.
> >
> > Once again thanks for doing this implementation for QEMU!
> >
> > Alistair
> >
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-29 21:50 ` [Qemu-riscv] " Alistair Francis
@ 2019-08-30 9:06 ` Alex Bennée
-1 siblings, 0 replies; 52+ messages in thread
From: Alex Bennée @ 2019-08-30 9:06 UTC (permalink / raw)
To: Alistair Francis
Cc: Peter Maydell, Riku Voipio, open list:RISC-V, Sagar Karandikar,
Bastian Koppelmann, Palmer Dabbelt,
qemu-devel@nongnu.org Developers, Laurent Vivier,
Alistair Francis, liuzhiwei, Aurelien Jarno
Alistair Francis <alistair23@gmail.com> writes:
> On Thu, Aug 29, 2019 at 5:05 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>>
>> On 2019/8/29 上午5:34, Alistair Francis wrote:
>> > On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>> >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>> >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>> >> ---
>> >> fpu/softfloat.c | 119 +
>> >> include/fpu/softfloat.h | 4 +
>> >> linux-user/riscv/cpu_loop.c | 8 +-
>> >> target/riscv/Makefile.objs | 2 +-
>> >> target/riscv/cpu.h | 30 +
>> >> target/riscv/cpu_bits.h | 15 +
>> >> target/riscv/cpu_helper.c | 7 +
>> >> target/riscv/csr.c | 65 +-
>> >> target/riscv/helper.h | 354 +
>> >> target/riscv/insn32.decode | 374 +-
>> >> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>> >> target/riscv/translate.c | 1 +
>> >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
>> >> 13 files changed, 28017 insertions(+), 9 deletions(-)
>> >> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
>> >> create mode 100644 target/riscv/vector_helper.c
>> >>
>> > Hello,
>> >
>> > Thanks for the patch!
>> >
>> > As others have pointed out you will need to split the patch up into
>> > multiple smaller patches, otherwise it is too hard to review almost
>> > 30,000 lines of code.
>>
>> Hi, Alistair
>>
>> I'm so sorry for the inconvenience. It will be a patch set with a cover
>> letter in V2.
>
> No worries.
>
>>
>> > Can you also include a cover letter with your patch series describing
>> > how you are testing this? AFAIK vector extension support isn't in any
>> > compiler so I'm assuming you are handwriting the assembly or have
>> > toolchain patches. Either way it will help if you can share that so
>> > others can test your implementation.
>>
>> Yes, it's handwriting assembly. The assembler in Binutils has support
>> Vector extension. First define an function test_vadd_vv_8 in assembly
>> and then it can be called from a C program.
>>
>> The function is something like
>>
>> /* vadd.vv */
>> TEST_FUNC(test_vadd_vv_8)
>> vsetvli t1, x0, e8, m2
>> vlb.v v6, (a4)
>> vsb.v v6, (a3)
>> vsetvli t1, a0, e8, m2
>> vlb.v v0, (a1)
>> vlb.v v2, (a2)
>> vadd.vv v4, v0, v2
>> vsb.v v4, (a3)
>> ret
>> .size test_vadd_vv_8, .-test_vadd_vv_8
>
> If possible it might be worth releasing the code that you are using for testing.
>
>>
>> It takes more time to test than to implement the instructions. Maybe
>> there is some better test method or some forced test cases in QEMU.
>> Could you give me some advice for testing?
>
> Richard's idea of risu seems like a good option.
>
> Thinking about it a bit more we are going to have other extensions in
> the future that will need assembly testing so setting up a test
> framework seems like a good idea. I am happy to help try and get this
> going as well.
tests/tcg already has the bits you need for both linux-user and system
based testing. The main problem is getting a version of gcc that is new
enough to emit the newer instructions. I recently updated the images to
buster so gcc is pretty recent now (8.3).
I did start down the road of a general "op" test frame work which tried
to come up with a common framework/boilerplate so all you needed to do
was supply a new function (possible with a hex encoded instruction) and
a list of expected inputs and outputs:
https://github.com/stsquad/qemu/commits/testing/generic-op-tester
I suspect it was over engineered but perhaps it would be worth reviving
it (or something like it) to make adding a simple single instruction
test case with minimal additional verbiage?
>
> Alistair
>
>>
>> Best Regards,
>>
>> Zhiwei
>>
>> > Alex and Richard have kindly started the review. Once you have
>> > addressed their comments and split this patch up into smaller patches
>> > you can send a v2 and we can go from there.
>> >
>> > Once again thanks for doing this implementation for QEMU!
>> >
>> > Alistair
>> >
--
Alex Bennée
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-30 9:06 ` Alex Bennée
0 siblings, 0 replies; 52+ messages in thread
From: Alex Bennée @ 2019-08-30 9:06 UTC (permalink / raw)
To: Alistair Francis
Cc: liuzhiwei, qemu-devel@nongnu.org Developers, open list:RISC-V,
Peter Maydell, Palmer Dabbelt, Sagar Karandikar,
Bastian Koppelmann, Riku Voipio, Laurent Vivier,
Alistair Francis, Aurelien Jarno
Alistair Francis <alistair23@gmail.com> writes:
> On Thu, Aug 29, 2019 at 5:05 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>>
>> On 2019/8/29 上午5:34, Alistair Francis wrote:
>> > On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>> >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>> >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>> >> ---
>> >> fpu/softfloat.c | 119 +
>> >> include/fpu/softfloat.h | 4 +
>> >> linux-user/riscv/cpu_loop.c | 8 +-
>> >> target/riscv/Makefile.objs | 2 +-
>> >> target/riscv/cpu.h | 30 +
>> >> target/riscv/cpu_bits.h | 15 +
>> >> target/riscv/cpu_helper.c | 7 +
>> >> target/riscv/csr.c | 65 +-
>> >> target/riscv/helper.h | 354 +
>> >> target/riscv/insn32.decode | 374 +-
>> >> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>> >> target/riscv/translate.c | 1 +
>> >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
>> >> 13 files changed, 28017 insertions(+), 9 deletions(-)
>> >> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
>> >> create mode 100644 target/riscv/vector_helper.c
>> >>
>> > Hello,
>> >
>> > Thanks for the patch!
>> >
>> > As others have pointed out you will need to split the patch up into
>> > multiple smaller patches, otherwise it is too hard to review almost
>> > 30,000 lines of code.
>>
>> Hi, Alistair
>>
>> I'm so sorry for the inconvenience. It will be a patch set with a cover
>> letter in V2.
>
> No worries.
>
>>
>> > Can you also include a cover letter with your patch series describing
>> > how you are testing this? AFAIK vector extension support isn't in any
>> > compiler so I'm assuming you are handwriting the assembly or have
>> > toolchain patches. Either way it will help if you can share that so
>> > others can test your implementation.
>>
>> Yes, it's handwriting assembly. The assembler in Binutils has support
>> Vector extension. First define an function test_vadd_vv_8 in assembly
>> and then it can be called from a C program.
>>
>> The function is something like
>>
>> /* vadd.vv */
>> TEST_FUNC(test_vadd_vv_8)
>> vsetvli t1, x0, e8, m2
>> vlb.v v6, (a4)
>> vsb.v v6, (a3)
>> vsetvli t1, a0, e8, m2
>> vlb.v v0, (a1)
>> vlb.v v2, (a2)
>> vadd.vv v4, v0, v2
>> vsb.v v4, (a3)
>> ret
>> .size test_vadd_vv_8, .-test_vadd_vv_8
>
> If possible it might be worth releasing the code that you are using for testing.
>
>>
>> It takes more time to test than to implement the instructions. Maybe
>> there is some better test method or some forced test cases in QEMU.
>> Could you give me some advice for testing?
>
> Richard's idea of risu seems like a good option.
>
> Thinking about it a bit more we are going to have other extensions in
> the future that will need assembly testing so setting up a test
> framework seems like a good idea. I am happy to help try and get this
> going as well.
tests/tcg already has the bits you need for both linux-user and system
based testing. The main problem is getting a version of gcc that is new
enough to emit the newer instructions. I recently updated the images to
buster so gcc is pretty recent now (8.3).
I did start down the road of a general "op" test frame work which tried
to come up with a common framework/boilerplate so all you needed to do
was supply a new function (possible with a hex encoded instruction) and
a list of expected inputs and outputs:
https://github.com/stsquad/qemu/commits/testing/generic-op-tester
I suspect it was over engineered but perhaps it would be worth reviving
it (or something like it) to make adding a simple single instruction
test case with minimal additional verbiage?
>
> Alistair
>
>>
>> Best Regards,
>>
>> Zhiwei
>>
>> > Alex and Richard have kindly started the review. Once you have
>> > addressed their comments and split this patch up into smaller patches
>> > you can send a v2 and we can go from there.
>> >
>> > Once again thanks for doing this implementation for QEMU!
>> >
>> > Alistair
>> >
--
Alex Bennée
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-30 9:06 ` [Qemu-riscv] " Alex Bennée
@ 2019-08-30 18:39 ` Alistair Francis
-1 siblings, 0 replies; 52+ messages in thread
From: Alistair Francis @ 2019-08-30 18:39 UTC (permalink / raw)
To: Alex Bennée
Cc: Peter Maydell, Riku Voipio, open list:RISC-V, Sagar Karandikar,
Bastian Koppelmann, Palmer Dabbelt,
qemu-devel@nongnu.org Developers, Laurent Vivier,
Alistair Francis, liuzhiwei, Aurelien Jarno
On Fri, Aug 30, 2019 at 2:06 AM Alex Bennée <alex.bennee@linaro.org> wrote:
>
>
> Alistair Francis <alistair23@gmail.com> writes:
>
> > On Thu, Aug 29, 2019 at 5:05 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
> >>
> >> On 2019/8/29 上午5:34, Alistair Francis wrote:
> >> > On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
> >> >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> >> >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
> >> >> ---
> >> >> fpu/softfloat.c | 119 +
> >> >> include/fpu/softfloat.h | 4 +
> >> >> linux-user/riscv/cpu_loop.c | 8 +-
> >> >> target/riscv/Makefile.objs | 2 +-
> >> >> target/riscv/cpu.h | 30 +
> >> >> target/riscv/cpu_bits.h | 15 +
> >> >> target/riscv/cpu_helper.c | 7 +
> >> >> target/riscv/csr.c | 65 +-
> >> >> target/riscv/helper.h | 354 +
> >> >> target/riscv/insn32.decode | 374 +-
> >> >> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> >> >> target/riscv/translate.c | 1 +
> >> >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
> >> >> 13 files changed, 28017 insertions(+), 9 deletions(-)
> >> >> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> >> >> create mode 100644 target/riscv/vector_helper.c
> >> >>
> >> > Hello,
> >> >
> >> > Thanks for the patch!
> >> >
> >> > As others have pointed out you will need to split the patch up into
> >> > multiple smaller patches, otherwise it is too hard to review almost
> >> > 30,000 lines of code.
> >>
> >> Hi, Alistair
> >>
> >> I'm so sorry for the inconvenience. It will be a patch set with a cover
> >> letter in V2.
> >
> > No worries.
> >
> >>
> >> > Can you also include a cover letter with your patch series describing
> >> > how you are testing this? AFAIK vector extension support isn't in any
> >> > compiler so I'm assuming you are handwriting the assembly or have
> >> > toolchain patches. Either way it will help if you can share that so
> >> > others can test your implementation.
> >>
> >> Yes, it's handwriting assembly. The assembler in Binutils has support
> >> Vector extension. First define an function test_vadd_vv_8 in assembly
> >> and then it can be called from a C program.
> >>
> >> The function is something like
> >>
> >> /* vadd.vv */
> >> TEST_FUNC(test_vadd_vv_8)
> >> vsetvli t1, x0, e8, m2
> >> vlb.v v6, (a4)
> >> vsb.v v6, (a3)
> >> vsetvli t1, a0, e8, m2
> >> vlb.v v0, (a1)
> >> vlb.v v2, (a2)
> >> vadd.vv v4, v0, v2
> >> vsb.v v4, (a3)
> >> ret
> >> .size test_vadd_vv_8, .-test_vadd_vv_8
> >
> > If possible it might be worth releasing the code that you are using for testing.
> >
> >>
> >> It takes more time to test than to implement the instructions. Maybe
> >> there is some better test method or some forced test cases in QEMU.
> >> Could you give me some advice for testing?
> >
> > Richard's idea of risu seems like a good option.
> >
> > Thinking about it a bit more we are going to have other extensions in
> > the future that will need assembly testing so setting up a test
> > framework seems like a good idea. I am happy to help try and get this
> > going as well.
Ah, I looked into this more and it compares it to hardware running the
same binary. In this case there is no hardware so that doesn't work
too well.
What we could do though, is compare it to Spike (which I think has the
vector instructions?) which would have the same effect.
>
> tests/tcg already has the bits you need for both linux-user and system
> based testing. The main problem is getting a version of gcc that is new
> enough to emit the newer instructions. I recently updated the images to
> buster so gcc is pretty recent now (8.3).
In this case there is no GCC with the new instructions.
>
> I did start down the road of a general "op" test frame work which tried
> to come up with a common framework/boilerplate so all you needed to do
> was supply a new function (possible with a hex encoded instruction) and
> a list of expected inputs and outputs:
>
> https://github.com/stsquad/qemu/commits/testing/generic-op-tester
>
> I suspect it was over engineered but perhaps it would be worth reviving
> it (or something like it) to make adding a simple single instruction
> test case with minimal additional verbiage?
That would be interesting, I'll take a look.
Alistair
>
> >
> > Alistair
> >
> >>
> >> Best Regards,
> >>
> >> Zhiwei
> >>
> >> > Alex and Richard have kindly started the review. Once you have
> >> > addressed their comments and split this patch up into smaller patches
> >> > you can send a v2 and we can go from there.
> >> >
> >> > Once again thanks for doing this implementation for QEMU!
> >> >
> >> > Alistair
> >> >
>
>
> --
> Alex Bennée
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-08-30 18:39 ` Alistair Francis
0 siblings, 0 replies; 52+ messages in thread
From: Alistair Francis @ 2019-08-30 18:39 UTC (permalink / raw)
To: Alex Bennée
Cc: liuzhiwei, qemu-devel@nongnu.org Developers, open list:RISC-V,
Peter Maydell, Palmer Dabbelt, Sagar Karandikar,
Bastian Koppelmann, Riku Voipio, Laurent Vivier,
Alistair Francis, Aurelien Jarno
On Fri, Aug 30, 2019 at 2:06 AM Alex Bennée <alex.bennee@linaro.org> wrote:
>
>
> Alistair Francis <alistair23@gmail.com> writes:
>
> > On Thu, Aug 29, 2019 at 5:05 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
> >>
> >> On 2019/8/29 上午5:34, Alistair Francis wrote:
> >> > On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
> >> >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> >> >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
> >> >> ---
> >> >> fpu/softfloat.c | 119 +
> >> >> include/fpu/softfloat.h | 4 +
> >> >> linux-user/riscv/cpu_loop.c | 8 +-
> >> >> target/riscv/Makefile.objs | 2 +-
> >> >> target/riscv/cpu.h | 30 +
> >> >> target/riscv/cpu_bits.h | 15 +
> >> >> target/riscv/cpu_helper.c | 7 +
> >> >> target/riscv/csr.c | 65 +-
> >> >> target/riscv/helper.h | 354 +
> >> >> target/riscv/insn32.decode | 374 +-
> >> >> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> >> >> target/riscv/translate.c | 1 +
> >> >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
> >> >> 13 files changed, 28017 insertions(+), 9 deletions(-)
> >> >> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> >> >> create mode 100644 target/riscv/vector_helper.c
> >> >>
> >> > Hello,
> >> >
> >> > Thanks for the patch!
> >> >
> >> > As others have pointed out you will need to split the patch up into
> >> > multiple smaller patches, otherwise it is too hard to review almost
> >> > 30,000 lines of code.
> >>
> >> Hi, Alistair
> >>
> >> I'm so sorry for the inconvenience. It will be a patch set with a cover
> >> letter in V2.
> >
> > No worries.
> >
> >>
> >> > Can you also include a cover letter with your patch series describing
> >> > how you are testing this? AFAIK vector extension support isn't in any
> >> > compiler so I'm assuming you are handwriting the assembly or have
> >> > toolchain patches. Either way it will help if you can share that so
> >> > others can test your implementation.
> >>
> >> Yes, it's handwriting assembly. The assembler in Binutils has support
> >> Vector extension. First define an function test_vadd_vv_8 in assembly
> >> and then it can be called from a C program.
> >>
> >> The function is something like
> >>
> >> /* vadd.vv */
> >> TEST_FUNC(test_vadd_vv_8)
> >> vsetvli t1, x0, e8, m2
> >> vlb.v v6, (a4)
> >> vsb.v v6, (a3)
> >> vsetvli t1, a0, e8, m2
> >> vlb.v v0, (a1)
> >> vlb.v v2, (a2)
> >> vadd.vv v4, v0, v2
> >> vsb.v v4, (a3)
> >> ret
> >> .size test_vadd_vv_8, .-test_vadd_vv_8
> >
> > If possible it might be worth releasing the code that you are using for testing.
> >
> >>
> >> It takes more time to test than to implement the instructions. Maybe
> >> there is some better test method or some forced test cases in QEMU.
> >> Could you give me some advice for testing?
> >
> > Richard's idea of risu seems like a good option.
> >
> > Thinking about it a bit more we are going to have other extensions in
> > the future that will need assembly testing so setting up a test
> > framework seems like a good idea. I am happy to help try and get this
> > going as well.
Ah, I looked into this more and it compares it to hardware running the
same binary. In this case there is no hardware so that doesn't work
too well.
What we could do though, is compare it to Spike (which I think has the
vector instructions?) which would have the same effect.
>
> tests/tcg already has the bits you need for both linux-user and system
> based testing. The main problem is getting a version of gcc that is new
> enough to emit the newer instructions. I recently updated the images to
> buster so gcc is pretty recent now (8.3).
In this case there is no GCC with the new instructions.
>
> I did start down the road of a general "op" test frame work which tried
> to come up with a common framework/boilerplate so all you needed to do
> was supply a new function (possible with a hex encoded instruction) and
> a list of expected inputs and outputs:
>
> https://github.com/stsquad/qemu/commits/testing/generic-op-tester
>
> I suspect it was over engineered but perhaps it would be worth reviving
> it (or something like it) to make adding a simple single instruction
> test case with minimal additional verbiage?
That would be interesting, I'll take a look.
Alistair
>
> >
> > Alistair
> >
> >>
> >> Best Regards,
> >>
> >> Zhiwei
> >>
> >> > Alex and Richard have kindly started the review. Once you have
> >> > addressed their comments and split this patch up into smaller patches
> >> > you can send a v2 and we can go from there.
> >> >
> >> > Once again thanks for doing this implementation for QEMU!
> >> >
> >> > Alistair
> >> >
>
>
> --
> Alex Bennée
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-29 21:50 ` [Qemu-riscv] " Alistair Francis
@ 2019-09-02 6:36 ` liuzhiwei
-1 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-09-02 6:36 UTC (permalink / raw)
To: Alistair Francis
Cc: Peter Maydell, Riku Voipio, open list:RISC-V, Sagar Karandikar,
Bastian Koppelmann, Palmer Dabbelt,
qemu-devel@nongnu.org Developers, Laurent Vivier,
Alistair Francis, Alex Bennée, Aurelien Jarno
On 2019/8/30 上午5:50, Alistair Francis wrote:
> On Thu, Aug 29, 2019 at 5:05 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>> On 2019/8/29 上午5:34, Alistair Francis wrote:
>>> On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>>>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>>>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>>>> ---
>>>> fpu/softfloat.c | 119 +
>>>> include/fpu/softfloat.h | 4 +
>>>> linux-user/riscv/cpu_loop.c | 8 +-
>>>> target/riscv/Makefile.objs | 2 +-
>>>> target/riscv/cpu.h | 30 +
>>>> target/riscv/cpu_bits.h | 15 +
>>>> target/riscv/cpu_helper.c | 7 +
>>>> target/riscv/csr.c | 65 +-
>>>> target/riscv/helper.h | 354 +
>>>> target/riscv/insn32.decode | 374 +-
>>>> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>>>> target/riscv/translate.c | 1 +
>>>> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
>>>> 13 files changed, 28017 insertions(+), 9 deletions(-)
>>>> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
>>>> create mode 100644 target/riscv/vector_helper.c
>>>>
>>> Hello,
>>>
>>> Thanks for the patch!
>>>
>>> As others have pointed out you will need to split the patch up into
>>> multiple smaller patches, otherwise it is too hard to review almost
>>> 30,000 lines of code.
>> Hi, Alistair
>>
>> I'm so sorry for the inconvenience. It will be a patch set with a cover
>> letter in V2.
> No worries.
>
>>> Can you also include a cover letter with your patch series describing
>>> how you are testing this? AFAIK vector extension support isn't in any
>>> compiler so I'm assuming you are handwriting the assembly or have
>>> toolchain patches. Either way it will help if you can share that so
>>> others can test your implementation.
>> Yes, it's handwriting assembly. The assembler in Binutils has support
>> Vector extension. First define an function test_vadd_vv_8 in assembly
>> and then it can be called from a C program.
>>
>> The function is something like
>>
>> /* vadd.vv */
>> TEST_FUNC(test_vadd_vv_8)
>> vsetvli t1, x0, e8, m2
>> vlb.v v6, (a4)
>> vsb.v v6, (a3)
>> vsetvli t1, a0, e8, m2
>> vlb.v v0, (a1)
>> vlb.v v2, (a2)
>> vadd.vv v4, v0, v2
>> vsb.v v4, (a3)
>> ret
>> .size test_vadd_vv_8, .-test_vadd_vv_8
> If possible it might be worth releasing the code that you are using for testing.
Yes, but I didn't find a good place to release these test codes currently.
>
>> It takes more time to test than to implement the instructions. Maybe
>> there is some better test method or some forced test cases in QEMU.
>> Could you give me some advice for testing?
> Richard's idea of risu seems like a good option.
All the test cases will be validated in Spike, which has supported the
same vector specification. But this cross validation work may delay
until V3.
I will split the patch, and address comments as soon as possible, to
ensure the patch V2 can be sent next week.
Would it be all right?
>
> Thinking about it a bit more we are going to have other extensions in
> the future that will need assembly testing so setting up a test
> framework seems like a good idea. I am happy to help try and get this
> going as well.
>
> Alistair
There is usually a big difference between new a ISA extension and the
others. I doubt there is an general framework. A very light framework
includes
building, input aiding generation, result validation, and report maybe
OK .
Best Regards,
Zhiwei
>> Best Regards,
>>
>> Zhiwei
>>
>>> Alex and Richard have kindly started the review. Once you have
>>> addressed their comments and split this patch up into smaller patches
>>> you can send a v2 and we can go from there.
>>>
>>> Once again thanks for doing this implementation for QEMU!
>>>
>>> Alistair
>>>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-09-02 6:36 ` liuzhiwei
0 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-09-02 6:36 UTC (permalink / raw)
To: Alistair Francis
Cc: qemu-devel@nongnu.org Developers, open list:RISC-V,
Peter Maydell, Palmer Dabbelt, Sagar Karandikar,
Bastian Koppelmann, Riku Voipio, Laurent Vivier,
Alistair Francis, Alex Bennée, Aurelien Jarno
On 2019/8/30 上午5:50, Alistair Francis wrote:
> On Thu, Aug 29, 2019 at 5:05 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>> On 2019/8/29 上午5:34, Alistair Francis wrote:
>>> On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote:
>>>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>>>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>>>> ---
>>>> fpu/softfloat.c | 119 +
>>>> include/fpu/softfloat.h | 4 +
>>>> linux-user/riscv/cpu_loop.c | 8 +-
>>>> target/riscv/Makefile.objs | 2 +-
>>>> target/riscv/cpu.h | 30 +
>>>> target/riscv/cpu_bits.h | 15 +
>>>> target/riscv/cpu_helper.c | 7 +
>>>> target/riscv/csr.c | 65 +-
>>>> target/riscv/helper.h | 354 +
>>>> target/riscv/insn32.decode | 374 +-
>>>> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>>>> target/riscv/translate.c | 1 +
>>>> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
>>>> 13 files changed, 28017 insertions(+), 9 deletions(-)
>>>> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
>>>> create mode 100644 target/riscv/vector_helper.c
>>>>
>>> Hello,
>>>
>>> Thanks for the patch!
>>>
>>> As others have pointed out you will need to split the patch up into
>>> multiple smaller patches, otherwise it is too hard to review almost
>>> 30,000 lines of code.
>> Hi, Alistair
>>
>> I'm so sorry for the inconvenience. It will be a patch set with a cover
>> letter in V2.
> No worries.
>
>>> Can you also include a cover letter with your patch series describing
>>> how you are testing this? AFAIK vector extension support isn't in any
>>> compiler so I'm assuming you are handwriting the assembly or have
>>> toolchain patches. Either way it will help if you can share that so
>>> others can test your implementation.
>> Yes, it's handwriting assembly. The assembler in Binutils has support
>> Vector extension. First define an function test_vadd_vv_8 in assembly
>> and then it can be called from a C program.
>>
>> The function is something like
>>
>> /* vadd.vv */
>> TEST_FUNC(test_vadd_vv_8)
>> vsetvli t1, x0, e8, m2
>> vlb.v v6, (a4)
>> vsb.v v6, (a3)
>> vsetvli t1, a0, e8, m2
>> vlb.v v0, (a1)
>> vlb.v v2, (a2)
>> vadd.vv v4, v0, v2
>> vsb.v v4, (a3)
>> ret
>> .size test_vadd_vv_8, .-test_vadd_vv_8
> If possible it might be worth releasing the code that you are using for testing.
Yes, but I didn't find a good place to release these test codes currently.
>
>> It takes more time to test than to implement the instructions. Maybe
>> there is some better test method or some forced test cases in QEMU.
>> Could you give me some advice for testing?
> Richard's idea of risu seems like a good option.
All the test cases will be validated in Spike, which has supported the
same vector specification. But this cross validation work may delay
until V3.
I will split the patch, and address comments as soon as possible, to
ensure the patch V2 can be sent next week.
Would it be all right?
>
> Thinking about it a bit more we are going to have other extensions in
> the future that will need assembly testing so setting up a test
> framework seems like a good idea. I am happy to help try and get this
> going as well.
>
> Alistair
There is usually a big difference between new a ISA extension and the
others. I doubt there is an general framework. A very light framework
includes
building, input aiding generation, result validation, and report maybe
OK .
Best Regards,
Zhiwei
>> Best Regards,
>>
>> Zhiwei
>>
>>> Alex and Richard have kindly started the review. Once you have
>>> addressed their comments and split this patch up into smaller patches
>>> you can send a v2 and we can go from there.
>>>
>>> Once again thanks for doing this implementation for QEMU!
>>>
>>> Alistair
>>>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-29 15:14 ` [Qemu-riscv] " Richard Henderson
@ 2019-09-02 6:54 ` liuzhiwei
-1 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-09-02 6:54 UTC (permalink / raw)
To: Richard Henderson, Alistair Francis
Cc: Peter Maydell, Riku Voipio, open list:RISC-V, Sagar Karandikar,
Bastian Koppelmann, Palmer Dabbelt,
qemu-devel@nongnu.org Developers, Laurent Vivier,
Alistair Francis, Alex Bennée, Aurelien Jarno
On 2019/8/29 下午11:14, Richard Henderson wrote:
> On 8/29/19 5:00 AM, liuzhiwei wrote:
>> Maybe there is some better test method or some forced test cases in QEMU. Could
>> you give me some advice for testing?
> If you have hardware, or another simulator, RISU is very good
> for testing these sorts of things.
>
> See https://git.linaro.org/people/pmaydell/risu.git
>
> You'll need to write new support for RISC-V, but it's not hard
> and we can help out with that.
>
>
> r~
>
Hi, Richard
Thank you for your advice. I will run test cases in Spike for cross
validation at first.
Best Regards,
Zhiwei
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-09-02 6:54 ` liuzhiwei
0 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-09-02 6:54 UTC (permalink / raw)
To: Richard Henderson, Alistair Francis
Cc: Peter Maydell, Riku Voipio, open list:RISC-V, Sagar Karandikar,
Bastian Koppelmann, Palmer Dabbelt,
qemu-devel@nongnu.org Developers, Laurent Vivier,
Alistair Francis, Alex Bennée, Aurelien Jarno
On 2019/8/29 下午11:14, Richard Henderson wrote:
> On 8/29/19 5:00 AM, liuzhiwei wrote:
>> Maybe there is some better test method or some forced test cases in QEMU. Could
>> you give me some advice for testing?
> If you have hardware, or another simulator, RISU is very good
> for testing these sorts of things.
>
> See https://git.linaro.org/people/pmaydell/risu.git
>
> You'll need to write new support for RISC-V, but it's not hard
> and we can help out with that.
>
>
> r~
>
Hi, Richard
Thank you for your advice. I will run test cases in Spike for cross
validation at first.
Best Regards,
Zhiwei
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-29 15:09 ` [Qemu-riscv] " Richard Henderson
@ 2019-09-02 7:45 ` liuzhiwei
-1 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-09-02 7:45 UTC (permalink / raw)
To: Richard Henderson, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 2019/8/29 下午11:09, Richard Henderson wrote:
> On 8/29/19 5:45 AM, liuzhiwei wrote:
>> Even in qemu, it may be some situations that VSTART != 0. For example, a load
>> instruction leads to a page fault exception in a middle position. If VSTART ==
>> 0, some elements that had been loaded before the exception will be loaded once
>> again.
> Alternately, you can validate all of the pages before performing any memory
> operations. At which point there will never be an exception in the middle.
As a vector instruction may access memory across many pages, is there
any way to validate the pages? Page table walk ?Or some TLB APIs?
> As it turns out, you *must* do this in order to allow watchpoints to work
> correctly. David Hildebrand and I are at this moment fixing this aspect of
> watchpoints for s390x.
>
> See https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05979.html
I am interested in wathpoint implementation and once implemented the
user mode watchpoints in the wild.
A backtrace of watchpoint is like
#0 cpu_watchpoint_address_matches (wp=0x555556228110, addr=536871072,
len=1) at qemu/exec.c:1094
#1 0x000055555567204f in check_watchpoint (offset=160, len=1,
attrs=..., flags=2) at qemu/exec.c:2803
#2 0x0000555555672379 in watch_mem_write (opaque=0x0, addr=536871072,
val=165, size=1, attrs=...) at qemu/exec.c:2878
#3 0x00005555556d44bb in memory_region_write_with_attrs_accessor
(mr=0x5555561292e0 <io_mem_watch>, addr=536871072, value=0x7fffedffe2c8,
size=1, shift=0, mask=255, attrs=...)
at qemu/memory.c:553
#4 0x00005555556d45de in access_with_adjusted_size (addr=536871072,
value=0x7fffedffe2c8, size=1, access_size_min=1, access_size_max=8,
access_fn=0x5555556d43cd <memory_region_write_with_attrs_accessor>,
mr=0x5555561292e0 <io_mem_watch>, attrs=...) at qemu/memory.c:594
#5 0x00005555556d7247 in memory_region_dispatch_write
(mr=0x5555561292e0 <io_mem_watch>, addr=536871072, data=165, size=1,
attrs=...) at qemu/memory.c:1480
#6 0x00005555556f0d13 in io_writex (env=0x5555561efb58,
iotlbentry=0x5555561f5398, mmu_idx=1, val=165, addr=536871072,
retaddr=0, recheck=false, size=1) at qemu/accel/tcg/cputlb.c:909
#7 0x00005555556f19a6 in io_writeb (env=0x5555561efb58, mmu_idx=1,
index=0, val=165 '\245', addr=536871072, retaddr=0, recheck=false) at
qemu/accel/tcg/softmmu_template.h:268
#8 0x00005555556f1b54 in helper_ret_stb_mmu (env=0x5555561efb58,
addr=536871072, val=165 '\245', oi=1, retaddr=0) at
qemu/accel/tcg/softmmu_template.h:304
#9 0x0000555555769f06 in cpu_stb_data_ra (env=0x5555561efb58,
ptr=536871072, v=165, retaddr=0) at
qemu/include/exec/cpu_ldst_template.h:182
#10 0x0000555555769f80 in cpu_stb_data (env=0x5555561efb58,
ptr=536871072, v=165) at /qemu/include/exec/cpu_ldst_template.h:194
#11 0x000055555576a913 in csky_cpu_stb_data (env=0x5555561efb58,
vaddr=536871072, data=165 '\245') at qemu/target/csky/csky_ldst.c:48
#12 0x000055555580ba7d in helper_vdsp2_vstru_n (env=0x5555561efb58,
insn=4167183360) at qemu/target/csky/op_vdsp2.c:1317
The path is not related to probe_write in the patch().
Could you give more details or a test case where watchpoint doesn't work
correctly?
>
> r~
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-09-02 7:45 ` liuzhiwei
0 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-09-02 7:45 UTC (permalink / raw)
To: Richard Henderson, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 2019/8/29 下午11:09, Richard Henderson wrote:
> On 8/29/19 5:45 AM, liuzhiwei wrote:
>> Even in qemu, it may be some situations that VSTART != 0. For example, a load
>> instruction leads to a page fault exception in a middle position. If VSTART ==
>> 0, some elements that had been loaded before the exception will be loaded once
>> again.
> Alternately, you can validate all of the pages before performing any memory
> operations. At which point there will never be an exception in the middle.
As a vector instruction may access memory across many pages, is there
any way to validate the pages? Page table walk ?Or some TLB APIs?
> As it turns out, you *must* do this in order to allow watchpoints to work
> correctly. David Hildebrand and I are at this moment fixing this aspect of
> watchpoints for s390x.
>
> See https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05979.html
I am interested in wathpoint implementation and once implemented the
user mode watchpoints in the wild.
A backtrace of watchpoint is like
#0 cpu_watchpoint_address_matches (wp=0x555556228110, addr=536871072,
len=1) at qemu/exec.c:1094
#1 0x000055555567204f in check_watchpoint (offset=160, len=1,
attrs=..., flags=2) at qemu/exec.c:2803
#2 0x0000555555672379 in watch_mem_write (opaque=0x0, addr=536871072,
val=165, size=1, attrs=...) at qemu/exec.c:2878
#3 0x00005555556d44bb in memory_region_write_with_attrs_accessor
(mr=0x5555561292e0 <io_mem_watch>, addr=536871072, value=0x7fffedffe2c8,
size=1, shift=0, mask=255, attrs=...)
at qemu/memory.c:553
#4 0x00005555556d45de in access_with_adjusted_size (addr=536871072,
value=0x7fffedffe2c8, size=1, access_size_min=1, access_size_max=8,
access_fn=0x5555556d43cd <memory_region_write_with_attrs_accessor>,
mr=0x5555561292e0 <io_mem_watch>, attrs=...) at qemu/memory.c:594
#5 0x00005555556d7247 in memory_region_dispatch_write
(mr=0x5555561292e0 <io_mem_watch>, addr=536871072, data=165, size=1,
attrs=...) at qemu/memory.c:1480
#6 0x00005555556f0d13 in io_writex (env=0x5555561efb58,
iotlbentry=0x5555561f5398, mmu_idx=1, val=165, addr=536871072,
retaddr=0, recheck=false, size=1) at qemu/accel/tcg/cputlb.c:909
#7 0x00005555556f19a6 in io_writeb (env=0x5555561efb58, mmu_idx=1,
index=0, val=165 '\245', addr=536871072, retaddr=0, recheck=false) at
qemu/accel/tcg/softmmu_template.h:268
#8 0x00005555556f1b54 in helper_ret_stb_mmu (env=0x5555561efb58,
addr=536871072, val=165 '\245', oi=1, retaddr=0) at
qemu/accel/tcg/softmmu_template.h:304
#9 0x0000555555769f06 in cpu_stb_data_ra (env=0x5555561efb58,
ptr=536871072, v=165, retaddr=0) at
qemu/include/exec/cpu_ldst_template.h:182
#10 0x0000555555769f80 in cpu_stb_data (env=0x5555561efb58,
ptr=536871072, v=165) at /qemu/include/exec/cpu_ldst_template.h:194
#11 0x000055555576a913 in csky_cpu_stb_data (env=0x5555561efb58,
vaddr=536871072, data=165 '\245') at qemu/target/csky/csky_ldst.c:48
#12 0x000055555580ba7d in helper_vdsp2_vstru_n (env=0x5555561efb58,
insn=4167183360) at qemu/target/csky/op_vdsp2.c:1317
The path is not related to probe_write in the patch().
Could you give more details or a test case where watchpoint doesn't work
correctly?
>
> r~
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [Qemu-riscv] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-29 14:06 ` [Qemu-riscv] " Chih-Min Chao
@ 2019-09-02 8:17 ` liuzhiwei
0 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-09-02 8:17 UTC (permalink / raw)
To: Chih-Min Chao
Cc: Peter Maydell, riku.voipio, open list:RISC-V, Sagar Karandikar,
Bastian Koppelmann, Palmer Dabbelt,
qemu-devel@nongnu.org Developers, laurent, Alistair Francis,
Alex Bennée, aurelien
On 2019/8/29 下午10:06, Chih-Min Chao wrote:
> Hi Liuzhiwei,
>
> Some comments:
> 1. vector extension allows flexible implementation. It is better
> to describe the limitation of current implementation (such as
> vlen/elen/slen) , supported sections and unsupported features.
Thanks! All mentioned will be in patch V2.
> 2. there should be cfg.ext_v to turn on vector extension from
> command line
I will add the vector extension to cpu "any". Is it all right?
> 3. from license
> It should be "Copyright (c) 2019 C-SKY Limited, All
> rights reserved." but not "2011 ~ 2019"
>
> It is huge work wait and thanks for your contribution.
>
> chihmin
>
> On Wed, Aug 28, 2019 at 3:06 PM liuzhiwei <zhiwei_liu@c-sky.com
> <mailto:zhiwei_liu@c-sky.com>> wrote:
>
> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com
> <mailto:zhiwei_liu@c-sky.com>>
> ---
> fpu/softfloat.c | 119 +
> include/fpu/softfloat.h | 4 +
> linux-user/riscv/cpu_loop.c | 8 +-
> target/riscv/Makefile.objs | 2 +-
> target/riscv/cpu.h | 30 +
> target/riscv/cpu_bits.h | 15 +
> target/riscv/cpu_helper.c | 7 +
> target/riscv/csr.c | 65 +-
> target/riscv/helper.h | 354 +
> target/riscv/insn32.decode | 374 +-
> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> target/riscv/translate.c | 1 +
> target/riscv/vector_helper.c | 26563
> ++++++++++++++++++++++++++++++
> 13 files changed, 28017 insertions(+), 9 deletions(-)
> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> create mode 100644 target/riscv/vector_helper.c
>
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-09-02 8:17 ` liuzhiwei
0 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-09-02 8:17 UTC (permalink / raw)
To: Chih-Min Chao
Cc: qemu-devel@nongnu.org Developers, open list:RISC-V,
Peter Maydell, Palmer Dabbelt, Sagar Karandikar,
Bastian Koppelmann, riku.voipio, laurent, Alistair Francis,
Alex Bennée, aurelien
[-- Attachment #1: Type: text/plain, Size: 2100 bytes --]
On 2019/8/29 下午10:06, Chih-Min Chao wrote:
> Hi Liuzhiwei,
>
> Some comments:
> 1. vector extension allows flexible implementation. It is better
> to describe the limitation of current implementation (such as
> vlen/elen/slen) , supported sections and unsupported features.
Thanks! All mentioned will be in patch V2.
> 2. there should be cfg.ext_v to turn on vector extension from
> command line
I will add the vector extension to cpu "any". Is it all right?
> 3. from license
> It should be "Copyright (c) 2019 C-SKY Limited, All
> rights reserved." but not "2011 ~ 2019"
>
> It is huge work wait and thanks for your contribution.
>
> chihmin
>
> On Wed, Aug 28, 2019 at 3:06 PM liuzhiwei <zhiwei_liu@c-sky.com
> <mailto:zhiwei_liu@c-sky.com>> wrote:
>
> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com
> <mailto:zhiwei_liu@c-sky.com>>
> ---
> fpu/softfloat.c | 119 +
> include/fpu/softfloat.h | 4 +
> linux-user/riscv/cpu_loop.c | 8 +-
> target/riscv/Makefile.objs | 2 +-
> target/riscv/cpu.h | 30 +
> target/riscv/cpu_bits.h | 15 +
> target/riscv/cpu_helper.c | 7 +
> target/riscv/csr.c | 65 +-
> target/riscv/helper.h | 354 +
> target/riscv/insn32.decode | 374 +-
> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
> target/riscv/translate.c | 1 +
> target/riscv/vector_helper.c | 26563
> ++++++++++++++++++++++++++++++
> 13 files changed, 28017 insertions(+), 9 deletions(-)
> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
> create mode 100644 target/riscv/vector_helper.c
>
>
[-- Attachment #2: Type: text/html, Size: 3704 bytes --]
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 18:54 ` [Qemu-riscv] " Richard Henderson
@ 2019-09-02 9:43 ` liuzhiwei
-1 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-09-02 9:43 UTC (permalink / raw)
To: Richard Henderson, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 2019/8/29 上午2:54, Richard Henderson wrote:
> On 8/27/19 7:36 PM, liuzhiwei wrote:
>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>> ---
>> fpu/softfloat.c | 119 +
>> include/fpu/softfloat.h | 4 +
>> linux-user/riscv/cpu_loop.c | 8 +-
>> target/riscv/Makefile.objs | 2 +-
>> target/riscv/cpu.h | 30 +
>> target/riscv/cpu_bits.h | 15 +
>> target/riscv/cpu_helper.c | 7 +
>> target/riscv/csr.c | 65 +-
>> target/riscv/helper.h | 354 +
>> target/riscv/insn32.decode | 374 +-
>> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>> target/riscv/translate.c | 1 +
>> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
>> 13 files changed, 28017 insertions(+), 9 deletions(-)
> As Alex mentioned, this is *far* too big to be presented as a single patch.
OK, split it into patch set in V2
>
>> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
>> index 3ff3fa5..3b0754c 100644
>> --- a/include/fpu/softfloat.h
>> +++ b/include/fpu/softfloat.h
>> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status);
>> float16 float16_sqrt(float16, float_status *status);
>> int float16_compare(float16, float16, float_status *status);
>> int float16_compare_quiet(float16, float16, float_status *status);
>> +int float16_unordered_quiet(float16, float16, float_status *status);
>> +int float16_le(float16, float16, float_status *status);
>> +int float16_lt(float16, float16, float_status *status);
>> +int float16_eq_quiet(float16, float16, float_status *status);
> As Alex mentioned, none of these changes are required, as all
> functionality is provided by float16_compare{,_quiet}.
Yes, use float16_compare instead.
>> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
>> index 12aa3c0..b01548a 100644
>> --- a/linux-user/riscv/cpu_loop.c
>> +++ b/linux-user/riscv/cpu_loop.c
>> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
>> signum = 0;
>> sigcode = 0;
>> sigaddr = 0;
>> -
>> + if (env->foflag) {
>> + if (env->vfp.vl != 0) {
>> + env->foflag = false;
>> + env->pc += 4;
>> + continue;
>> + }
> This is most definitely not the correct way to implement first-fault.
>
> You need to have a look at target/arm/sve_helper.c, e.g. sve_ldff1_r,
> where we test pages for validity with tlb_vaddr_to_host.
Why should test pages for validity? If there is a page fault in running
time, it just the case why it must use the fault-only-first instruction.
>> + /* vector coprocessor state. */
>> + struct {
>> + union VECTOR {
>> + float64 f64[VUNIT(64)];
>> + float32 f32[VUNIT(32)];
>> + float16 f16[VUNIT(16)];
>> + target_ulong ul[VUNIT(sizeof(target_ulong))];
>> + uint64_t u64[VUNIT(64)];
>> + int64_t s64[VUNIT(64)];
>> + uint32_t u32[VUNIT(32)];
>> + int32_t s32[VUNIT(32)];
>> + uint16_t u16[VUNIT(16)];
>> + int16_t s16[VUNIT(16)];
>> + uint8_t u8[VUNIT(8)];
>> + int8_t s8[VUNIT(8)];
>> + } vreg[32];
>> + target_ulong vxrm;
>> + target_ulong vxsat;
>> + target_ulong vl;
>> + target_ulong vstart;
>> + target_ulong vtype;
>> + float_status fp_status;
>> + } vfp;
> You've obviously copied "vfp" from target/arm. Drop that. It makes no sense
> in the context of risc-v.
> I'm not sure that vreg[].element[] really makes the most sense in the context
> of how risc-v rearranges its elements. It will almost certainly fail clang
> validators, if enabled, since you'll be indexing beyond the end of vreg[n] into
> vreg[n+1].
>
> It might be best to have a single array:
>
> union {
> uint64_t u64[32 * VLEN / 64];
> ...
> uint8_t u8[32 * VLEN / 8];
> } velt;
>
> This is clearer to the compiler that this is a single block of memory that we
> can index as we please.
A single array is a good idea. But vreg[] will be better for understanding as it preserve the register concepts.
> Note that float64/float32/float16 are legacy. They will always be equivalent
> to the unsigned integer types of the same size.
>
> Is there really any vector operation at all that is dependent on XLEN? If not,
> then there is no reason to confuse things by including target_ulong.
>
OK.
>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>> index e32b612..405caf6 100644
>> --- a/target/riscv/cpu_helper.c
>> +++ b/target/riscv/cpu_helper.c
>> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
>> [PRV_H] = RISCV_EXCP_H_ECALL,
>> [PRV_M] = RISCV_EXCP_M_ECALL
>> };
>> + if (env->foflag) {
>> + if (env->vfp.vl != 0) {
>> + env->foflag = false;
>> + env->pc += 4;
>> + return;
>> + }
>> + }
> Again, not the way to implement first-fault.
>
> In particular, you haven't even verified that do_interrupt has been called on
> behalf of a RISCV_EXCP_LOAD_PAGE_FAULT. This could be a timer tick.
I don't think this could be a timer tick. A timer tick must not
interrupt one instruction in qemu.
According to the specification, if there is a RISCV_EXCP_LOAD_PAGE_FAULT
in the instruction, and some elements had been loaded or stored,
the remaining elements will not be processed again after restore from
the exception.
If there is a RISCV_EXCP_LOAD_PAGE_FAULT in the instruction, and no
elements had been loaded or stored, the remaining elements will be
processed again after restore from the exception.
>
>> +#define MAX_U8 ((uint8_t)0xff)
>> +#define MIN_U8 ((uint8_t)0x0)
>> +#define MAX_S8 ((int8_t)0x7f)
>> +#define MIN_S8 ((int8_t)0x80)
>> +#define SIGNBIT16 (1 << 15)
>> +#define MAX_U16 ((uint16_t)0xffff)
>> +#define MIN_U16 ((uint16_t)0x0)
>> +#define MAX_S16 ((int16_t)0x7fff)
>> +#define MIN_S16 ((int16_t)0x8000)
>> +#define SIGNBIT32 (1 << 31)
>> +#define MAX_U32 ((uint32_t)0xffffffff)
>> +#define MIN_U32 ((uint32_t)0x0)
>> +#define MAX_S32 ((int32_t)0x7fffffff)
>> +#define MIN_S32 ((int32_t)0x80000000)
>> +#define SIGNBIT64 ((uint64_t)1 << 63)
>> +#define MAX_U64 ((uint64_t)0xffffffffffffffff)
>> +#define MIN_U64 ((uint64_t)0x0)
>> +#define MAX_S64 ((int64_t)0x7fffffffffffffff)
>> +#define MIN_S64 ((int64_t)0x8000000000000000)
> Why are you replicating INT8_MIN et al?
Thanks, it will be removed.
>
>
>> +static target_ulong vector_get_index(CPURISCVState *env, int rs1, int rs2,
>> + int index, int mem, int width, int nf)
>> +{
>> + target_ulong abs_off, base = env->gpr[rs1];
>> + target_long offset;
>> + switch (width) {
>> + case 8:
>> + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + nf * mem;
>> + break;
>> + case 16:
>> + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + nf * mem;
>> + break;
>> + case 32:
>> + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + nf * mem;
>> + break;
>> + case 64:
>> + offset = env->vfp.vreg[rs2].s64[index] + nf * mem;
>> + break;
>> + default:
>> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> This is broken. You cannot use GETPC() anywhere except in the outermost
> HELPER(). Otherwise you're not computing the return address back into the
> code_gen_buffer, which is what is required to properly unwind the guest state.
Yes, I will fix it.
>
>> +static inline bool vector_vtype_ill(CPURISCVState *env)
>> +{
>> + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) {
>> + return true;
>> + }
>> + return false;
>> +}
>> +
>> +static inline void vector_vtype_set_ill(CPURISCVState *env)
>> +{
>> + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1);
>> + return;
>> +}
>> +
>> +static inline int vector_vtype_get_sew(CPURISCVState *env)
>> +{
>> + return (env->vfp.vtype >> 2) & 0x7;
>> +}
>> +
>> +static inline int vector_get_width(CPURISCVState *env)
>> +{
>> + return 8 * (1 << vector_vtype_get_sew(env));
>> +}
>> +
>> +static inline int vector_get_lmul(CPURISCVState *env)
>> +{
>> + return 1 << (env->vfp.vtype & 0x3);
>> +}
>> +
>> +static inline int vector_get_vlmax(CPURISCVState *env)
>> +{
>> + return vector_get_lmul(env) * VLEN / vector_get_width(env);
>> +}
>> +
>> +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int width,
>> + int lmul, int index)
>> +{
>> + int mlen = width / lmul;
>> + int idx = (index * mlen) / 8;
>> + int pos = (index * mlen) % 8;
>> +
>> + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1);
>> +}
> I would strongly encourage you place the components of vtype within tb_flags
> via cpu_get_tb_cpu_state. This would allow you to move quite a few checks from
> run-time to translation-time.
Good idea and some difficult.
> Recall that translation happens once (per configuration), whereas execution
> happens many times. Obviously, the more configurations that we create, the
> more translation that must happen.
>
> But the vtypei argument to vsetvli is a good choice, because it is constant,
> relates directly to the compiled code, and is unrelated to the length of the
> data being processed.
Good choice for what? I am not quite understand.
> With that, you can verify at translation:
>
> (1) vill
> (2) v[n], for (n % lmul) != 0
> (3) v[n] overlapping v[0] for masked/carry operations, with lmul > 1
>
> and
>
> (4) you can arrange the helpers so that instead of 1 helper that has to
> handle all SEW, you have N helpers, each handling a different SEW.
For all vector instructions or just vsetvli?
> And with all of this done, I believe you no longer need to pass the register
> number to the helper. You can pass the address of v[n], which is much more
> like how the tcg generic vector support works.
>
> Whether or not to include VL in tb_flags is a harder choice. Certainly not the
> exact value of VL, as that would lead to different translations for every loop
> tail. But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a
> single bit. Knowing that this condition is true would allow some use of the
> tcg generic vector support.
>
> E.g. vadd.vv could be
>
> if (masked) {
> switch (SEW) {
> case MO_8:
> gen_helper_vadd8_mask(...);
> break;
> ...
> }
> } else if (vl_eq_vlmax) {
> tcg_gen_gvec_add(SEW, vreg_ofs(vd), vreg_ofs(vs2), vreg_ofs(vs1),
> VLEN * LMUL, VLEN * LMUL);
> } else {
> switch (SEW) {
> case MO_8:
> gen_helper_vadd8(...);
> break;
> ...
> }
> }
>
> Or, equivalently, pack pointers to the actual generator functions into a
> structure so that this code structure can be shared between many instructions.
>
> Bear in mind that all tcg gvec operations operate strictly upon lanes. I.e.
>
> vd[x] = vs1[x] op vs2[x]
>
> thus the actual arrangement of the elements in storage is irrelevant and SLEN
> need not be considered here.
Thank you very much. Although it is some difficult for me to address
your comments, they are very helpful.
Best Regards,
Zhiwei
>
>
> r~
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-09-02 9:43 ` liuzhiwei
0 siblings, 0 replies; 52+ messages in thread
From: liuzhiwei @ 2019-09-02 9:43 UTC (permalink / raw)
To: Richard Henderson, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 2019/8/29 上午2:54, Richard Henderson wrote:
> On 8/27/19 7:36 PM, liuzhiwei wrote:
>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>> ---
>> fpu/softfloat.c | 119 +
>> include/fpu/softfloat.h | 4 +
>> linux-user/riscv/cpu_loop.c | 8 +-
>> target/riscv/Makefile.objs | 2 +-
>> target/riscv/cpu.h | 30 +
>> target/riscv/cpu_bits.h | 15 +
>> target/riscv/cpu_helper.c | 7 +
>> target/riscv/csr.c | 65 +-
>> target/riscv/helper.h | 354 +
>> target/riscv/insn32.decode | 374 +-
>> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>> target/riscv/translate.c | 1 +
>> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
>> 13 files changed, 28017 insertions(+), 9 deletions(-)
> As Alex mentioned, this is *far* too big to be presented as a single patch.
OK, split it into patch set in V2
>
>> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
>> index 3ff3fa5..3b0754c 100644
>> --- a/include/fpu/softfloat.h
>> +++ b/include/fpu/softfloat.h
>> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status);
>> float16 float16_sqrt(float16, float_status *status);
>> int float16_compare(float16, float16, float_status *status);
>> int float16_compare_quiet(float16, float16, float_status *status);
>> +int float16_unordered_quiet(float16, float16, float_status *status);
>> +int float16_le(float16, float16, float_status *status);
>> +int float16_lt(float16, float16, float_status *status);
>> +int float16_eq_quiet(float16, float16, float_status *status);
> As Alex mentioned, none of these changes are required, as all
> functionality is provided by float16_compare{,_quiet}.
Yes, use float16_compare instead.
>> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
>> index 12aa3c0..b01548a 100644
>> --- a/linux-user/riscv/cpu_loop.c
>> +++ b/linux-user/riscv/cpu_loop.c
>> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env)
>> signum = 0;
>> sigcode = 0;
>> sigaddr = 0;
>> -
>> + if (env->foflag) {
>> + if (env->vfp.vl != 0) {
>> + env->foflag = false;
>> + env->pc += 4;
>> + continue;
>> + }
> This is most definitely not the correct way to implement first-fault.
>
> You need to have a look at target/arm/sve_helper.c, e.g. sve_ldff1_r,
> where we test pages for validity with tlb_vaddr_to_host.
Why should test pages for validity? If there is a page fault in running
time, it just the case why it must use the fault-only-first instruction.
>> + /* vector coprocessor state. */
>> + struct {
>> + union VECTOR {
>> + float64 f64[VUNIT(64)];
>> + float32 f32[VUNIT(32)];
>> + float16 f16[VUNIT(16)];
>> + target_ulong ul[VUNIT(sizeof(target_ulong))];
>> + uint64_t u64[VUNIT(64)];
>> + int64_t s64[VUNIT(64)];
>> + uint32_t u32[VUNIT(32)];
>> + int32_t s32[VUNIT(32)];
>> + uint16_t u16[VUNIT(16)];
>> + int16_t s16[VUNIT(16)];
>> + uint8_t u8[VUNIT(8)];
>> + int8_t s8[VUNIT(8)];
>> + } vreg[32];
>> + target_ulong vxrm;
>> + target_ulong vxsat;
>> + target_ulong vl;
>> + target_ulong vstart;
>> + target_ulong vtype;
>> + float_status fp_status;
>> + } vfp;
> You've obviously copied "vfp" from target/arm. Drop that. It makes no sense
> in the context of risc-v.
> I'm not sure that vreg[].element[] really makes the most sense in the context
> of how risc-v rearranges its elements. It will almost certainly fail clang
> validators, if enabled, since you'll be indexing beyond the end of vreg[n] into
> vreg[n+1].
>
> It might be best to have a single array:
>
> union {
> uint64_t u64[32 * VLEN / 64];
> ...
> uint8_t u8[32 * VLEN / 8];
> } velt;
>
> This is clearer to the compiler that this is a single block of memory that we
> can index as we please.
A single array is a good idea. But vreg[] will be better for understanding as it preserve the register concepts.
> Note that float64/float32/float16 are legacy. They will always be equivalent
> to the unsigned integer types of the same size.
>
> Is there really any vector operation at all that is dependent on XLEN? If not,
> then there is no reason to confuse things by including target_ulong.
>
OK.
>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>> index e32b612..405caf6 100644
>> --- a/target/riscv/cpu_helper.c
>> +++ b/target/riscv/cpu_helper.c
>> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
>> [PRV_H] = RISCV_EXCP_H_ECALL,
>> [PRV_M] = RISCV_EXCP_M_ECALL
>> };
>> + if (env->foflag) {
>> + if (env->vfp.vl != 0) {
>> + env->foflag = false;
>> + env->pc += 4;
>> + return;
>> + }
>> + }
> Again, not the way to implement first-fault.
>
> In particular, you haven't even verified that do_interrupt has been called on
> behalf of a RISCV_EXCP_LOAD_PAGE_FAULT. This could be a timer tick.
I don't think this could be a timer tick. A timer tick must not
interrupt one instruction in qemu.
According to the specification, if there is a RISCV_EXCP_LOAD_PAGE_FAULT
in the instruction, and some elements had been loaded or stored,
the remaining elements will not be processed again after restore from
the exception.
If there is a RISCV_EXCP_LOAD_PAGE_FAULT in the instruction, and no
elements had been loaded or stored, the remaining elements will be
processed again after restore from the exception.
>
>> +#define MAX_U8 ((uint8_t)0xff)
>> +#define MIN_U8 ((uint8_t)0x0)
>> +#define MAX_S8 ((int8_t)0x7f)
>> +#define MIN_S8 ((int8_t)0x80)
>> +#define SIGNBIT16 (1 << 15)
>> +#define MAX_U16 ((uint16_t)0xffff)
>> +#define MIN_U16 ((uint16_t)0x0)
>> +#define MAX_S16 ((int16_t)0x7fff)
>> +#define MIN_S16 ((int16_t)0x8000)
>> +#define SIGNBIT32 (1 << 31)
>> +#define MAX_U32 ((uint32_t)0xffffffff)
>> +#define MIN_U32 ((uint32_t)0x0)
>> +#define MAX_S32 ((int32_t)0x7fffffff)
>> +#define MIN_S32 ((int32_t)0x80000000)
>> +#define SIGNBIT64 ((uint64_t)1 << 63)
>> +#define MAX_U64 ((uint64_t)0xffffffffffffffff)
>> +#define MIN_U64 ((uint64_t)0x0)
>> +#define MAX_S64 ((int64_t)0x7fffffffffffffff)
>> +#define MIN_S64 ((int64_t)0x8000000000000000)
> Why are you replicating INT8_MIN et al?
Thanks, it will be removed.
>
>
>> +static target_ulong vector_get_index(CPURISCVState *env, int rs1, int rs2,
>> + int index, int mem, int width, int nf)
>> +{
>> + target_ulong abs_off, base = env->gpr[rs1];
>> + target_long offset;
>> + switch (width) {
>> + case 8:
>> + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + nf * mem;
>> + break;
>> + case 16:
>> + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + nf * mem;
>> + break;
>> + case 32:
>> + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + nf * mem;
>> + break;
>> + case 64:
>> + offset = env->vfp.vreg[rs2].s64[index] + nf * mem;
>> + break;
>> + default:
>> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> This is broken. You cannot use GETPC() anywhere except in the outermost
> HELPER(). Otherwise you're not computing the return address back into the
> code_gen_buffer, which is what is required to properly unwind the guest state.
Yes, I will fix it.
>
>> +static inline bool vector_vtype_ill(CPURISCVState *env)
>> +{
>> + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) {
>> + return true;
>> + }
>> + return false;
>> +}
>> +
>> +static inline void vector_vtype_set_ill(CPURISCVState *env)
>> +{
>> + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1);
>> + return;
>> +}
>> +
>> +static inline int vector_vtype_get_sew(CPURISCVState *env)
>> +{
>> + return (env->vfp.vtype >> 2) & 0x7;
>> +}
>> +
>> +static inline int vector_get_width(CPURISCVState *env)
>> +{
>> + return 8 * (1 << vector_vtype_get_sew(env));
>> +}
>> +
>> +static inline int vector_get_lmul(CPURISCVState *env)
>> +{
>> + return 1 << (env->vfp.vtype & 0x3);
>> +}
>> +
>> +static inline int vector_get_vlmax(CPURISCVState *env)
>> +{
>> + return vector_get_lmul(env) * VLEN / vector_get_width(env);
>> +}
>> +
>> +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int width,
>> + int lmul, int index)
>> +{
>> + int mlen = width / lmul;
>> + int idx = (index * mlen) / 8;
>> + int pos = (index * mlen) % 8;
>> +
>> + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1);
>> +}
> I would strongly encourage you place the components of vtype within tb_flags
> via cpu_get_tb_cpu_state. This would allow you to move quite a few checks from
> run-time to translation-time.
Good idea and some difficult.
> Recall that translation happens once (per configuration), whereas execution
> happens many times. Obviously, the more configurations that we create, the
> more translation that must happen.
>
> But the vtypei argument to vsetvli is a good choice, because it is constant,
> relates directly to the compiled code, and is unrelated to the length of the
> data being processed.
Good choice for what? I am not quite understand.
> With that, you can verify at translation:
>
> (1) vill
> (2) v[n], for (n % lmul) != 0
> (3) v[n] overlapping v[0] for masked/carry operations, with lmul > 1
>
> and
>
> (4) you can arrange the helpers so that instead of 1 helper that has to
> handle all SEW, you have N helpers, each handling a different SEW.
For all vector instructions or just vsetvli?
> And with all of this done, I believe you no longer need to pass the register
> number to the helper. You can pass the address of v[n], which is much more
> like how the tcg generic vector support works.
>
> Whether or not to include VL in tb_flags is a harder choice. Certainly not the
> exact value of VL, as that would lead to different translations for every loop
> tail. But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a
> single bit. Knowing that this condition is true would allow some use of the
> tcg generic vector support.
>
> E.g. vadd.vv could be
>
> if (masked) {
> switch (SEW) {
> case MO_8:
> gen_helper_vadd8_mask(...);
> break;
> ...
> }
> } else if (vl_eq_vlmax) {
> tcg_gen_gvec_add(SEW, vreg_ofs(vd), vreg_ofs(vs2), vreg_ofs(vs1),
> VLEN * LMUL, VLEN * LMUL);
> } else {
> switch (SEW) {
> case MO_8:
> gen_helper_vadd8(...);
> break;
> ...
> }
> }
>
> Or, equivalently, pack pointers to the actual generator functions into a
> structure so that this code structure can be shared between many instructions.
>
> Bear in mind that all tcg gvec operations operate strictly upon lanes. I.e.
>
> vd[x] = vs1[x] op vs2[x]
>
> thus the actual arrangement of the elements in storage is irrelevant and SLEN
> need not be considered here.
Thank you very much. Although it is some difficult for me to address
your comments, they are very helpful.
Best Regards,
Zhiwei
>
>
> r~
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-09-02 9:43 ` [Qemu-riscv] " liuzhiwei
@ 2019-09-03 14:21 ` Richard Henderson
-1 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-09-03 14:21 UTC (permalink / raw)
To: liuzhiwei, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 9/2/19 2:43 AM, liuzhiwei wrote:
>> This is most definitely not the correct way to implement first-fault.
>>
>> You need to have a look at target/arm/sve_helper.c, e.g. sve_ldff1_r,
>> where we test pages for validity with tlb_vaddr_to_host.
> Why should test pages for validity? If there is a page fault in running time,
> it just the case why it must use the fault-only-first instruction.
So that the helper does not fault for the Nth access, N > 1.
You test to see if the page has a mapping, and if it doesn't,
you end the instruction, without going through the exception
path that I have objections to.
Except for gather loads, you don't have to test for every
access, only at page boundaries. And then you may also arrange
to use direct host access to the pages that you've validated.
Again, have a look at sve_ldff1_r.
> A single array is a good idea. But vreg[] will be better for understanding as it preserve the register concepts.
A function access to the registers would be just as good for that.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-09-03 14:21 ` Richard Henderson
0 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-09-03 14:21 UTC (permalink / raw)
To: liuzhiwei, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 9/2/19 2:43 AM, liuzhiwei wrote:
>> This is most definitely not the correct way to implement first-fault.
>>
>> You need to have a look at target/arm/sve_helper.c, e.g. sve_ldff1_r,
>> where we test pages for validity with tlb_vaddr_to_host.
> Why should test pages for validity? If there is a page fault in running time,
> it just the case why it must use the fault-only-first instruction.
So that the helper does not fault for the Nth access, N > 1.
You test to see if the page has a mapping, and if it doesn't,
you end the instruction, without going through the exception
path that I have objections to.
Except for gather loads, you don't have to test for every
access, only at page boundaries. And then you may also arrange
to use direct host access to the pages that you've validated.
Again, have a look at sve_ldff1_r.
> A single array is a good idea. But vreg[] will be better for understanding as it preserve the register concepts.
A function access to the registers would be just as good for that.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-09-02 7:45 ` [Qemu-riscv] " liuzhiwei
@ 2019-09-03 14:38 ` Richard Henderson
-1 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-09-03 14:38 UTC (permalink / raw)
To: liuzhiwei, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 9/2/19 12:45 AM, liuzhiwei wrote:
>
> On 2019/8/29 下午11:09, Richard Henderson wrote:
>> On 8/29/19 5:45 AM, liuzhiwei wrote:
>>> Even in qemu, it may be some situations that VSTART != 0. For example, a load
>>> instruction leads to a page fault exception in a middle position. If VSTART ==
>>> 0, some elements that had been loaded before the exception will be loaded once
>>> again.
>> Alternately, you can validate all of the pages before performing any memory
>> operations. At which point there will never be an exception in the middle.
>
> As a vector instruction may access memory across many pages, is there any way
> to validate the pages? Page table walk ?Or some TLB APIs?
Yes, there are TLB APIs. Several of them, depending on what is needed.
> #0 cpu_watchpoint_address_matches (wp=0x555556228110, addr=536871072, len=1)
> at qemu/exec.c:1094
> #1 0x000055555567204f in check_watchpoint (offset=160, len=1, attrs=...,
> flags=2) at qemu/exec.c:2803
> #2 0x0000555555672379 in watch_mem_write (opaque=0x0, addr=536871072, val=165,
> size=1, attrs=...) at qemu/exec.c:2878
> #3 0x00005555556d44bb in memory_region_write_with_attrs_accessor
> (mr=0x5555561292e0 <io_mem_watch>, addr=536871072, value=0x7fffedffe2c8,
> size=1, shift=0, mask=255, attrs=...)
> at qemu/memory.c:553
> #4 0x00005555556d45de in access_with_adjusted_size (addr=536871072,
> value=0x7fffedffe2c8, size=1, access_size_min=1, access_size_max=8,
> access_fn=0x5555556d43cd <memory_region_write_with_attrs_accessor>,
> mr=0x5555561292e0 <io_mem_watch>, attrs=...) at qemu/memory.c:594
> #5 0x00005555556d7247 in memory_region_dispatch_write (mr=0x5555561292e0
> <io_mem_watch>, addr=536871072, data=165, size=1, attrs=...) at qemu/memory.c:1480
> #6 0x00005555556f0d13 in io_writex (env=0x5555561efb58,
> iotlbentry=0x5555561f5398, mmu_idx=1, val=165, addr=536871072, retaddr=0,
> recheck=false, size=1) at qemu/accel/tcg/cputlb.c:909
> #7 0x00005555556f19a6 in io_writeb (env=0x5555561efb58, mmu_idx=1, index=0,
> val=165 '\245', addr=536871072, retaddr=0, recheck=false) at
> qemu/accel/tcg/softmmu_template.h:268
> #8 0x00005555556f1b54 in helper_ret_stb_mmu (env=0x5555561efb58,
> addr=536871072, val=165 '\245', oi=1, retaddr=0) at
> qemu/accel/tcg/softmmu_template.h:304
> #9 0x0000555555769f06 in cpu_stb_data_ra (env=0x5555561efb58, ptr=536871072,
> v=165, retaddr=0) at qemu/include/exec/cpu_ldst_template.h:182
> #10 0x0000555555769f80 in cpu_stb_data (env=0x5555561efb58, ptr=536871072,
> v=165) at /qemu/include/exec/cpu_ldst_template.h:194
> #11 0x000055555576a913 in csky_cpu_stb_data (env=0x5555561efb58,
> vaddr=536871072, data=165 '\245') at qemu/target/csky/csky_ldst.c:48
> #12 0x000055555580ba7d in helper_vdsp2_vstru_n (env=0x5555561efb58,
> insn=4167183360) at qemu/target/csky/op_vdsp2.c:1317
>
> The path is not related to probe_write in the patch().
Of course. It wasn't supposed to be.
> Could you give more details or a test case where watchpoint doesn't work
> correctly?
If the store partially, but not completely, overlaps the watchpoint. This is
obviously much easier to do with large vector operations than with normal
integer operations.
In this case, we may have completed some of the stores before encountering the
watchpoint. Which, inside check_watchpoint(), will longjmp back to the cpu
main loop. Now we have a problem: the store is partially complete and it
should not be.
Therefore, we now have patches queued in tcg-next that adjust probe_write to
perform both access and watchpoint tests. There is still target-specific code
that must be adjusted to match, so there are not currently any examples in the
tree to show.
However, the idea is:
(1) Instructions that perform more than one host store must probe
the entire range to be stored before performing any stores.
(2) Instructions that perform more than one host load must either
probe the entire range to be loaded, or collect the data in
temporary storage. If not using probes, writeback to the
register file must be delayed until after all loads are done.
(3) Any one probe may not cross a page boundary; splitting of the
access across pages must be done by the helper.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-riscv] [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
@ 2019-09-03 14:38 ` Richard Henderson
0 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2019-09-03 14:38 UTC (permalink / raw)
To: liuzhiwei, qemu-devel, qemu-riscv
Cc: peter.maydell, palmer, sagark, kbastian, riku.voipio, laurent,
Alistair.Francis, alex.bennee, aurelien
On 9/2/19 12:45 AM, liuzhiwei wrote:
>
> On 2019/8/29 下午11:09, Richard Henderson wrote:
>> On 8/29/19 5:45 AM, liuzhiwei wrote:
>>> Even in qemu, it may be some situations that VSTART != 0. For example, a load
>>> instruction leads to a page fault exception in a middle position. If VSTART ==
>>> 0, some elements that had been loaded before the exception will be loaded once
>>> again.
>> Alternately, you can validate all of the pages before performing any memory
>> operations. At which point there will never be an exception in the middle.
>
> As a vector instruction may access memory across many pages, is there any way
> to validate the pages? Page table walk ?Or some TLB APIs?
Yes, there are TLB APIs. Several of them, depending on what is needed.
> #0 cpu_watchpoint_address_matches (wp=0x555556228110, addr=536871072, len=1)
> at qemu/exec.c:1094
> #1 0x000055555567204f in check_watchpoint (offset=160, len=1, attrs=...,
> flags=2) at qemu/exec.c:2803
> #2 0x0000555555672379 in watch_mem_write (opaque=0x0, addr=536871072, val=165,
> size=1, attrs=...) at qemu/exec.c:2878
> #3 0x00005555556d44bb in memory_region_write_with_attrs_accessor
> (mr=0x5555561292e0 <io_mem_watch>, addr=536871072, value=0x7fffedffe2c8,
> size=1, shift=0, mask=255, attrs=...)
> at qemu/memory.c:553
> #4 0x00005555556d45de in access_with_adjusted_size (addr=536871072,
> value=0x7fffedffe2c8, size=1, access_size_min=1, access_size_max=8,
> access_fn=0x5555556d43cd <memory_region_write_with_attrs_accessor>,
> mr=0x5555561292e0 <io_mem_watch>, attrs=...) at qemu/memory.c:594
> #5 0x00005555556d7247 in memory_region_dispatch_write (mr=0x5555561292e0
> <io_mem_watch>, addr=536871072, data=165, size=1, attrs=...) at qemu/memory.c:1480
> #6 0x00005555556f0d13 in io_writex (env=0x5555561efb58,
> iotlbentry=0x5555561f5398, mmu_idx=1, val=165, addr=536871072, retaddr=0,
> recheck=false, size=1) at qemu/accel/tcg/cputlb.c:909
> #7 0x00005555556f19a6 in io_writeb (env=0x5555561efb58, mmu_idx=1, index=0,
> val=165 '\245', addr=536871072, retaddr=0, recheck=false) at
> qemu/accel/tcg/softmmu_template.h:268
> #8 0x00005555556f1b54 in helper_ret_stb_mmu (env=0x5555561efb58,
> addr=536871072, val=165 '\245', oi=1, retaddr=0) at
> qemu/accel/tcg/softmmu_template.h:304
> #9 0x0000555555769f06 in cpu_stb_data_ra (env=0x5555561efb58, ptr=536871072,
> v=165, retaddr=0) at qemu/include/exec/cpu_ldst_template.h:182
> #10 0x0000555555769f80 in cpu_stb_data (env=0x5555561efb58, ptr=536871072,
> v=165) at /qemu/include/exec/cpu_ldst_template.h:194
> #11 0x000055555576a913 in csky_cpu_stb_data (env=0x5555561efb58,
> vaddr=536871072, data=165 '\245') at qemu/target/csky/csky_ldst.c:48
> #12 0x000055555580ba7d in helper_vdsp2_vstru_n (env=0x5555561efb58,
> insn=4167183360) at qemu/target/csky/op_vdsp2.c:1317
>
> The path is not related to probe_write in the patch().
Of course. It wasn't supposed to be.
> Could you give more details or a test case where watchpoint doesn't work
> correctly?
If the store partially, but not completely, overlaps the watchpoint. This is
obviously much easier to do with large vector operations than with normal
integer operations.
In this case, we may have completed some of the stores before encountering the
watchpoint. Which, inside check_watchpoint(), will longjmp back to the cpu
main loop. Now we have a problem: the store is partially complete and it
should not be.
Therefore, we now have patches queued in tcg-next that adjust probe_write to
perform both access and watchpoint tests. There is still target-specific code
that must be adjusted to match, so there are not currently any examples in the
tree to show.
However, the idea is:
(1) Instructions that perform more than one host store must probe
the entire range to be stored before performing any stores.
(2) Instructions that perform more than one host load must either
probe the entire range to be loaded, or collect the data in
temporary storage. If not using probes, writeback to the
register file must be delayed until after all loads are done.
(3) Any one probe may not cross a page boundary; splitting of the
access across pages must be done by the helper.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-08-28 18:54 ` [Qemu-riscv] " Richard Henderson
` (2 preceding siblings ...)
(?)
@ 2019-12-19 9:11 ` LIU Zhiwei
2019-12-19 20:38 ` Richard Henderson
-1 siblings, 1 reply; 52+ messages in thread
From: LIU Zhiwei @ 2019-12-19 9:11 UTC (permalink / raw)
To: Richard Henderson; +Cc: Chih-Min Chao, palmer, Alistair.Francis, qemu-devel
Hi Richard,
Sorry to reply so late.
Upstream is really difficult . I was really frustrated to recieve so
many difficult comments.
It is hard for me to absorb them and will take a lot of time to fixup.
Now I will move on.
On 2019/8/29 2:54, Richard Henderson wrote:
> On 8/27/19 7:36 PM, liuzhiwei wrote:
>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25
>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com>
>> ---
>> fpu/softfloat.c | 119 +
>> include/fpu/softfloat.h | 4 +
>> linux-user/riscv/cpu_loop.c | 8 +-
>> target/riscv/Makefile.objs | 2 +-
>> target/riscv/cpu.h | 30 +
>> target/riscv/cpu_bits.h | 15 +
>> target/riscv/cpu_helper.c | 7 +
>> target/riscv/csr.c | 65 +-
>> target/riscv/helper.h | 354 +
>> target/riscv/insn32.decode | 374 +-
>> target/riscv/insn_trans/trans_rvv.inc.c | 484 +
>> target/riscv/translate.c | 1 +
>> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++
>> 13 files changed, 28017 insertions(+), 9 deletions(-)
>> + /* vector coprocessor state. */
>> + struct {
>> + union VECTOR {
>> + float64 f64[VUNIT(64)];
>> + float32 f32[VUNIT(32)];
>> + float16 f16[VUNIT(16)];
>> + target_ulong ul[VUNIT(sizeof(target_ulong))];
>> + uint64_t u64[VUNIT(64)];
>> + int64_t s64[VUNIT(64)];
>> + uint32_t u32[VUNIT(32)];
>> + int32_t s32[VUNIT(32)];
>> + uint16_t u16[VUNIT(16)];
>> + int16_t s16[VUNIT(16)];
>> + uint8_t u8[VUNIT(8)];
>> + int8_t s8[VUNIT(8)];
>> + } vreg[32];
>> + target_ulong vxrm;
>> + target_ulong vxsat;
>> + target_ulong vl;
>> + target_ulong vstart;
>> + target_ulong vtype;
>> + float_status fp_status;
>> + } vfp;
> You've obviously copied "vfp" from target/arm. Drop that. It makes no sense
> in the context of risc-v.
>
> I'm not sure that vreg[].element[] really makes the most sense in the context
> of how risc-v rearranges its elements.
Use vreg[].element[] is my gut feeling. It will be easiest to
understand the code.
As you said, view all vector registers as a single block of memory is
good for programing.
> It will almost certainly fail clang
> validators, if enabled, since you'll be indexing beyond the end of vreg[n] into
> vreg[n+1].
I'm sorry that it's really hard to absorb your opinion. I don't know why
clang will fail
when index beyond the end of vreg[n] into vreg[n+1].
> It might be best to have a single array:
>
> union {
> uint64_t u64[32 * VLEN / 64];
> ...
> uint8_t u8[32 * VLEN / 8];
> } velt;
>
> This is clearer to the compiler that this is a single block of memory that we
> can index as we please.
As Chih-Min Chao said in another part of PATCH V2 thread, VLEN will be
a property which can be
specified from command line. So the sub-struct maybe defined as
struct {
union{
uint64_t *u64 ;
int64_t *s64;
uint32_t *u32;
int32_t *s32;
uint16_t *u16;
int16_t *s16;
uint8_t *u8;
int8_t *s8;
} mem;
target_ulong vxrm;
target_ulong vxsat;
target_ulong vl;
target_ulong vstart;
target_ulong vtype;
} vext;
Will that be OK?
>> +static inline bool vector_vtype_ill(CPURISCVState *env)
>> +{
>> + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) {
>> + return true;
>> + }
>> + return false;
>> +}
>> +
>> +static inline void vector_vtype_set_ill(CPURISCVState *env)
>> +{
>> + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1);
>> + return;
>> +}
>> +
>> +static inline int vector_vtype_get_sew(CPURISCVState *env)
>> +{
>> + return (env->vfp.vtype >> 2) & 0x7;
>> +}
>> +
>> +static inline int vector_get_width(CPURISCVState *env)
>> +{
>> + return 8 * (1 << vector_vtype_get_sew(env));
>> +}
>> +
>> +static inline int vector_get_lmul(CPURISCVState *env)
>> +{
>> + return 1 << (env->vfp.vtype & 0x3);
>> +}
>> +
>> +static inline int vector_get_vlmax(CPURISCVState *env)
>> +{
>> + return vector_get_lmul(env) * VLEN / vector_get_width(env);
>> +}
>> +
>> +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int width,
>> + int lmul, int index)
>> +{
>> + int mlen = width / lmul;
>> + int idx = (index * mlen) / 8;
>> + int pos = (index * mlen) % 8;
>> +
>> + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1);
>> +}
> I would strongly encourage you place the components of vtype within tb_flags
> via cpu_get_tb_cpu_state. This would allow you to move quite a few checks from
> run-time to translation-time.
>
> Recall that translation happens once (per configuration), whereas execution
> happens many times. Obviously, the more configurations that we create, the
> more translation that must happen.
All check code will be moved from execution time to translation.
> But the vtypei argument to vsetvli is a good choice, because it is constant,
> relates directly to the compiled code, and is unrelated to the length of the
> data being processed.
>
> With that, you can verify at translation:
>
> (1) vill
> (2) v[n], for (n % lmul) != 0
> (3) v[n] overlapping v[0] for masked/carry operations, with lmul > 1
>
> and
>
> (4) you can arrange the helpers so that instead of 1 helper that has to
> handle all SEW, you have N helpers, each handling a different SEW.
>
> And with all of this done, I believe you no longer need to pass the register
> number to the helper. You can pass the address of v[n], which is much more
> like how the tcg generic vector support works.
>
> Whether or not to include VL in tb_flags is a harder choice. Certainly not the
> exact value of VL, as that would lead to different translations for every loop
> tail. But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a
> single bit. Knowing that this condition is true would allow some use of the
> tcg generic vector support.
The (ill, lmul, sew ) of vtype will be placed within tb_flags, also the
bit of (VSTART == 0 && VL == VLMAX).
So it will take 8 bits of tb flags for vector extension at least.
> E.g. vadd.vv could be
>
> if (masked) {
> switch (SEW) {
> case MO_8:
> gen_helper_vadd8_mask(...);
> break;
> ...
> }
> } else if (vl_eq_vlmax) {
> tcg_gen_gvec_add(SEW, vreg_ofs(vd), vreg_ofs(vs2), vreg_ofs(vs1),
> VLEN * LMUL, VLEN * LMUL);
> } else {
> switch (SEW) {
> case MO_8:
> gen_helper_vadd8(...);
> break;
> ...
> }
> }
>
> Or, equivalently, pack pointers to the actual generator functions into a
> structure so that this code structure can be shared between many instructions.
It's quiker to use generic vector of TCG.
However, I have one problem to support both command line VLEN and vreg_ofs.
As in SVE, vreg ofs is the offset from cpu_env. If the structure of
vector extension (to support command line VLEN) is
struct {
union{
uint64_t *u64 ;
int64_t *s64;
uint32_t *u32;
int32_t *s32;
uint16_t *u16;
int16_t *s16;
uint8_t *u8;
int8_t *s8;
} mem;
target_ulong vxrm;
target_ulong vxsat;
target_ulong vl;
target_ulong vstart;
target_ulong vtype;
} vext
I can't find the way to get the direct offset of vreg from cpu_env.
Maybe I should specify a max VLEN like the way of SVE?
Best Regards,
LIU Zhiwei
> Bear in mind that all tcg gvec operations operate strictly upon lanes. I.e.
>
> vd[x] = vs1[x] op vs2[x]
>
> thus the actual arrangement of the elements in storage is irrelevant and SLEN
> need not be considered here.
>
>
> r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-12-19 9:11 ` LIU Zhiwei
@ 2019-12-19 20:38 ` Richard Henderson
2019-12-25 9:36 ` LIU Zhiwei
0 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2019-12-19 20:38 UTC (permalink / raw)
To: LIU Zhiwei; +Cc: Chih-Min Chao, palmer, Alistair.Francis, qemu-devel
On 12/18/19 11:11 PM, LIU Zhiwei wrote:
> I'm sorry that it's really hard to absorb your opinion. I don't know why clang
> will fail
>
> when index beyond the end of vreg[n] into vreg[n+1].
I thought sure one of the address sanitizer checks would detect array bounds
overrun. But it becomes irrelevant
> As Chih-Min Chao said in another part of PATCH V2 thread, VLEN will be a
> property which can be
>
> specified from command line. So the sub-struct maybe defined as
>
> struct {
> union{
> uint64_t *u64 ;
> int64_t *s64;
> uint32_t *u32;
> int32_t *s32;
> uint16_t *u16;
> int16_t *s16;
> uint8_t *u8;
> int8_t *s8;
> } mem;
> target_ulong vxrm;
> target_ulong vxsat;
> target_ulong vl;
> target_ulong vstart;
> target_ulong vtype;
> } vext;
>
> Will that be OK?
Pointers have consequences. It can be done, but I don't think it is ideal.
> The (ill, lmul, sew ) of vtype will be placed within tb_flags, also the bit of
> (VSTART == 0 && VL == VLMAX).
>
> So it will take 8 bits of tb flags for vector extension at least.
Good.
> However, I have one problem to support both command line VLEN and vreg_ofs.
>
> As in SVE, vreg ofs is the offset from cpu_env. If the structure of vector
> extension (to support command line VLEN) is
>
> struct {
> union{
> uint64_t *u64 ;
> int64_t *s64;
> uint32_t *u32;
> int32_t *s32;
> uint16_t *u16;
> int16_t *s16;
> uint8_t *u8;
> int8_t *s8;
> } mem;
> target_ulong vxrm;
> target_ulong vxsat;
> target_ulong vl;
> target_ulong vstart;
> target_ulong vtype;
> } vext
>
> I can't find the way to get the direct offset of vreg from cpu_env.
>
> Maybe I should specify a max VLEN like the way of SVE?
I think a maximum vlen is best. A command-line option to adjust vlen is all
well and good, but there's no reason to have to support vlen=(1<<29).
Oh, and you probably need a minimum vlen of 16 bytes as well, otherwise you
will run afoul of the assert in tcg-op-gvec.c that requires gvec operations to
be aligned mod 16.
I think that all you need is
uint64_t vreg[32 * MAX_VLEN / 8] QEMU_ALIGNED(16);
which gives us
uint32_t vreg_ofs(DisasContext *ctx, int reg)
{
return offsetof(CPURISCVState, vreg) + reg * ctx->vlen;
}
I don't see the point of a union for vreg. I don't think you'll find that you
actually use it at all.
You do need to document the element ordering that you're going to use for vreg.
I.e. the mapping between the architectural vector register state and the
emulation state. You have two choices:
(1) all bytes in host endianness (e.g. target/ppc)
(2) bytes within each uint64_t in host endianness,
but each uint64_t is little-endian (e.g. target/arm).
Both require some fixup when running on a big-endian host.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-12-19 20:38 ` Richard Henderson
@ 2019-12-25 9:36 ` LIU Zhiwei
2019-12-28 1:14 ` Richard Henderson
0 siblings, 1 reply; 52+ messages in thread
From: LIU Zhiwei @ 2019-12-25 9:36 UTC (permalink / raw)
To: Richard Henderson; +Cc: palmer, Alistair.Francis, qemu-devel, Chih-Min Chao
[-- Attachment #1: Type: text/plain, Size: 5164 bytes --]
On 2019/12/20 4:38, Richard Henderson wrote:
> On 12/18/19 11:11 PM, LIU Zhiwei wrote:
>> I'm sorry that it's really hard to absorb your opinion. I don't know why clang
>> will fail
>>
>> when index beyond the end of vreg[n] into vreg[n+1].
> I thought sure one of the address sanitizer checks would detect array bounds
> overrun. But it becomes irrelevant
>
>> As Chih-Min Chao said in another part of PATCH V2 thread, VLEN will be a
>> property which can be
>>
>> specified from command line. So the sub-struct maybe defined as
>>
>> struct {
>> union{
>> uint64_t *u64 ;
>> int64_t *s64;
>> uint32_t *u32;
>> int32_t *s32;
>> uint16_t *u16;
>> int16_t *s16;
>> uint8_t *u8;
>> int8_t *s8;
>> } mem;
>> target_ulong vxrm;
>> target_ulong vxsat;
>> target_ulong vl;
>> target_ulong vstart;
>> target_ulong vtype;
>> } vext;
>>
>> Will that be OK?
> Pointers have consequences. It can be done, but I don't think it is ideal.
>
>> The (ill, lmul, sew ) of vtype will be placed within tb_flags, also the bit of
>> (VSTART == 0 && VL == VLMAX).
>>
>> So it will take 8 bits of tb flags for vector extension at least.
> Good.
>> However, I have one problem to support both command line VLEN and vreg_ofs.
>>
>> As in SVE, vreg ofs is the offset from cpu_env. If the structure of vector
>> extension (to support command line VLEN) is
>>
>> struct {
>> union{
>> uint64_t *u64 ;
>> int64_t *s64;
>> uint32_t *u32;
>> int32_t *s32;
>> uint16_t *u16;
>> int16_t *s16;
>> uint8_t *u8;
>> int8_t *s8;
>> } mem;
>> target_ulong vxrm;
>> target_ulong vxsat;
>> target_ulong vl;
>> target_ulong vstart;
>> target_ulong vtype;
>> } vext
>>
>> I can't find the way to get the direct offset of vreg from cpu_env.
>>
>> Maybe I should specify a max VLEN like the way of SVE?
> I think a maximum vlen is best. A command-line option to adjust vlen is all
> well and good, but there's no reason to have to support vlen=(1<<29).
>
> Oh, and you probably need a minimum vlen of 16 bytes as well, otherwise you
> will run afoul of the assert in tcg-op-gvec.c that requires gvec operations to
> be aligned mod 16.
>
> I think that all you need is
>
> uint64_t vreg[32 * MAX_VLEN / 8] QEMU_ALIGNED(16);
>
> which gives us
>
> uint32_t vreg_ofs(DisasContext *ctx, int reg)
> {
> return offsetof(CPURISCVState, vreg) + reg * ctx->vlen;
> }
struct {
uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16);
target_ulong vxrm;
target_ulong vxsat;
target_ulong vl;
target_ulong vstart;
target_ulong vtype;
} vext;
Is it OK?
> I don't see the point of a union for vreg. I don't think you'll find that you
> actually use it at all.
I think I can move most of execution check to translate time like SVE
now. However, there are still some differences from SVE.
1)cpu_env must be used as a parameter for helper function.
The helpers need use env->vext.vl and env->vext.vstart. Thus it
will be difficult to use out of line tcg_gen_gvec_ool.
void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz, int32_t data,
gen_helper_gvec_2 *fn)
{
......
fn(a0, a1, desc);
......
}
Maybe I have to write something similar to tcg_gen_gvec_ool in
trans_rvv.inc.c. But it will be redundant.
2)simd_desc is not proper.
I also need to transfer some members of DisasContext to helpers.
(Data, Vlmax, Mlen) is my current choice. Vlmax is the num of
elements of this operation, so it will defined as ctx->lmul * ctx->vlen
/ ctx->sew;
Data is reserved to expand. Mlen is mask length for one elment, so it
will defined as ctx->sew/ctx->lmul. As with Mlen, a active element will
be selected by
static inline int vext_elem_mask(void *v0, int mlen, int index)
{
int idx = (index * mlen) / 8;
int pos = (index * mlen) % 8;
return (v0[idx] >> pos) & 0x1;
}
So I may have to implement vext_desc instead of use the simd_desc,
which will be another redundant. Maybe a better way to mask elements?
> You do need to document the element ordering that you're going to use for vreg.
> I.e. the mapping between the architectural vector register state and the
> emulation state. You have two choices:
>
> (1) all bytes in host endianness (e.g. target/ppc)
> (2) bytes within each uint64_t in host endianness,
> but each uint64_t is little-endian (e.g. target/arm).
>
> Both require some fixup when running on a big-endian host.
Yes, I will take (2).
Best Regards,
Zhiwei
>
> r~
[-- Attachment #2: Type: text/html, Size: 7573 bytes --]
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-12-25 9:36 ` LIU Zhiwei
@ 2019-12-28 1:14 ` Richard Henderson
2019-12-30 8:11 ` LIU Zhiwei
0 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2019-12-28 1:14 UTC (permalink / raw)
To: LIU Zhiwei; +Cc: palmer, Alistair.Francis, qemu-devel, Chih-Min Chao
On 12/25/19 8:36 PM, LIU Zhiwei wrote:
> struct {
>
> uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16);
> target_ulong vxrm;
> target_ulong vxsat;
> target_ulong vl;
> target_ulong vstart;
> target_ulong vtype;
> } vext;
>
> Is it OK?
I don't think there's a good reason for the vext structure -- I would drop
that. Otherwise it looks good.
> However, there are still some differences from SVE.
>
> 1)cpu_env must be used as a parameter for helper function.
>
> The helpers need use env->vext.vl and env->vext.vstart. Thus it will be
> difficult to use out of line tcg_gen_gvec_ool.
Sure. That's also true of any of the fp operations, which will want to
accumulate ieee exceptions.
See tcg_gen_gvec_*_ptr(), which allows you to pass in cpu_env.
> 2)simd_desc is not proper.
>
> I also need to transfer some members of DisasContext to helpers.
>
> (Data, Vlmax, Mlen) is my current choice. Vlmax is the num of elements of
> this operation, so it will defined as ctx->lmul * ctx->vlen / ctx->sew;
The oprsz & maxsz parameters to tcg_gen_gvec_* should be given (ctx->lmul *
ctx->vlen). The sew parameter should be implied by the helper function called,
each helper function using a different type. Therefore vlmax can be trivially
computed within the helper from oprsz / sizeof(type).
> Data is reserved to expand. Mlen is mask length for one elment, so it will
> defined as ctx->sew/ctx->lmul. As with Mlen, a active element will
>
> be selected by
>
> static inline int vext_elem_mask(void *v0, int mlen, int index)
> {
> int idx = (index * mlen) / 8;
> int pos = (index * mlen) % 8;
>
> return (v0[idx] >> pos) & 0x1;
> }
>
> So I may have to implement vext_desc instead of use the simd_desc, which
> will be another redundant. Maybe a better way to mask elements?
I think you will want to define your own vext_desc, building upon simd_desc,
such that lg2(mlen) is passed in the first N bits of simd_data.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-12-28 1:14 ` Richard Henderson
@ 2019-12-30 8:11 ` LIU Zhiwei
2020-01-05 20:19 ` Richard Henderson
0 siblings, 1 reply; 52+ messages in thread
From: LIU Zhiwei @ 2019-12-30 8:11 UTC (permalink / raw)
To: Richard Henderson; +Cc: palmer, Alistair.Francis, qemu-devel, Chih-Min Chao
On 2019/12/28 9:14, Richard Henderson wrote:
> On 12/25/19 8:36 PM, LIU Zhiwei wrote:
>> struct {
>>
>> uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16);
>> target_ulong vxrm;
>> target_ulong vxsat;
>> target_ulong vl;
>> target_ulong vstart;
>> target_ulong vtype;
>> } vext;
>>
>> Is it OK?
> I don't think there's a good reason for the vext structure -- I would drop
> that. Otherwise it looks good.
>
>> However, there are still some differences from SVE.
>>
>> 1)cpu_env must be used as a parameter for helper function.
>>
>> The helpers need use env->vext.vl and env->vext.vstart. Thus it will be
>> difficult to use out of line tcg_gen_gvec_ool.
> Sure. That's also true of any of the fp operations, which will want to
> accumulate ieee exceptions.
>
> See tcg_gen_gvec_*_ptr(), which allows you to pass in cpu_env.
Thanks. The tcg_gen_gvec_*_ptr is good.
>
>> 2)simd_desc is not proper.
>>
>> I also need to transfer some members of DisasContext to helpers.
>>
>> (Data, Vlmax, Mlen) is my current choice. Vlmax is the num of elements of
>> this operation, so it will defined as ctx->lmul * ctx->vlen / ctx->sew;
> The oprsz & maxsz parameters to tcg_gen_gvec_* should be given (ctx->lmul *
> ctx->vlen). The sew parameter should be implied by the helper function called,
> each helper function using a different type. Therefore vlmax can be trivially
> computed within the helper from oprsz / sizeof(type).
It's clear that the oprsz & maxsz paramenters should be given (ctx->lmul
* ctx->vlen) for tcg_gen_gvec_add.
However It's not clear when use tcg_gen_gvec_*_ptr or tcg_gen_gvec_ool.
I think the meaning of oprsz is the
the bits of active elements. Therefore , oprsz is 8 * env->vext.vl in
RISC-V and it can't be fetched from
TB_FLAGS like SVE.
Probably oprsz field will be not be used in RISC-V vector extension.
>> Data is reserved to expand. Mlen is mask length for one elment, so it will
>> defined as ctx->sew/ctx->lmul. As with Mlen, a active element will
>>
>> be selected by
>>
>> static inline int vext_elem_mask(void *v0, int mlen, int index)
>> {
>> int idx = (index * mlen) / 8;
>> int pos = (index * mlen) % 8;
>>
>> return (v0[idx] >> pos) & 0x1;
>> }
>>
>> So I may have to implement vext_desc instead of use the simd_desc, which
>> will be another redundant. Maybe a better way to mask elements?
> I think you will want to define your own vext_desc, building upon simd_desc,
> such that lg2(mlen) is passed in the first N bits of simd_data.
Good. It's a good way to use the tcg_gen_gvec_*_ptr or tcg_gen_gvec_ool API.
Best Regards,
Zhiwei
>
> r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [Qemu-devel] [PATCH] RISCV: support riscv vector extension 0.7.1
2019-12-30 8:11 ` LIU Zhiwei
@ 2020-01-05 20:19 ` Richard Henderson
0 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2020-01-05 20:19 UTC (permalink / raw)
To: LIU Zhiwei; +Cc: palmer, Alistair.Francis, qemu-devel, Chih-Min Chao
On 12/30/19 6:11 PM, LIU Zhiwei wrote:
>
> However It's not clear when use tcg_gen_gvec_*_ptr or tcg_gen_gvec_ool. I think
> the meaning of oprsz is the
> the bits of active elements. Therefore , oprsz is 8 * env->vext.vl in RISC-V
> and it can't be fetched from
> TB_FLAGS like SVE.
>
> Probably oprsz field will be not be used in RISC-V vector extension.
Correct. For those risc-v helpers that are called when VL != VLMAX, you would
ignore the oprsz field and fetch it from env.
It may still be handy to pass in vlmax as maxsz, even if you leave the oprsz
field 0. You'll find that out as you do the coding, I suppose.
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
end of thread, other threads:[~2020-01-05 20:20 UTC | newest]
Thread overview: 52+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-28 2:36 [Qemu-riscv] [PATCH] RISCV: support riscv vector extension 0.7.1 liuzhiwei
2019-08-28 9:08 ` [Qemu-devel] " Alex Bennée
2019-08-28 9:08 ` [Qemu-riscv] " Alex Bennée
2019-08-28 16:39 ` [Qemu-devel] " Richard Henderson
2019-08-28 16:39 ` [Qemu-riscv] " Richard Henderson
2019-08-29 13:35 ` liuzhiwei
2019-08-29 13:35 ` [Qemu-riscv] " liuzhiwei
2019-08-28 18:54 ` [Qemu-devel] " Richard Henderson
2019-08-28 18:54 ` [Qemu-riscv] " Richard Henderson
2019-08-28 20:43 ` Richard Henderson
2019-08-28 20:43 ` [Qemu-riscv] " Richard Henderson
2019-08-29 12:45 ` liuzhiwei
2019-08-29 12:45 ` [Qemu-riscv] " liuzhiwei
2019-08-29 15:09 ` Richard Henderson
2019-08-29 15:09 ` [Qemu-riscv] " Richard Henderson
2019-09-02 7:45 ` liuzhiwei
2019-09-02 7:45 ` [Qemu-riscv] " liuzhiwei
2019-09-03 14:38 ` Richard Henderson
2019-09-03 14:38 ` [Qemu-riscv] " Richard Henderson
2019-09-02 9:43 ` liuzhiwei
2019-09-02 9:43 ` [Qemu-riscv] " liuzhiwei
2019-09-03 14:21 ` Richard Henderson
2019-09-03 14:21 ` [Qemu-riscv] " Richard Henderson
2019-12-19 9:11 ` LIU Zhiwei
2019-12-19 20:38 ` Richard Henderson
2019-12-25 9:36 ` LIU Zhiwei
2019-12-28 1:14 ` Richard Henderson
2019-12-30 8:11 ` LIU Zhiwei
2020-01-05 20:19 ` Richard Henderson
2019-08-28 19:20 ` [Qemu-riscv] " Aleksandar Markovic
2019-08-29 12:56 ` liuzhiwei
2019-08-29 18:32 ` Aleksandar Markovic
2019-08-29 18:32 ` [Qemu-riscv] " Aleksandar Markovic
2019-08-28 21:34 ` Alistair Francis
2019-08-28 21:34 ` [Qemu-riscv] " Alistair Francis
2019-08-29 12:00 ` liuzhiwei
2019-08-29 12:00 ` [Qemu-riscv] " liuzhiwei
2019-08-29 15:14 ` Richard Henderson
2019-08-29 15:14 ` [Qemu-riscv] " Richard Henderson
2019-09-02 6:54 ` liuzhiwei
2019-09-02 6:54 ` [Qemu-riscv] " liuzhiwei
2019-08-29 21:50 ` Alistair Francis
2019-08-29 21:50 ` [Qemu-riscv] " Alistair Francis
2019-08-30 9:06 ` Alex Bennée
2019-08-30 9:06 ` [Qemu-riscv] " Alex Bennée
2019-08-30 18:39 ` Alistair Francis
2019-08-30 18:39 ` [Qemu-riscv] " Alistair Francis
2019-09-02 6:36 ` liuzhiwei
2019-09-02 6:36 ` [Qemu-riscv] " liuzhiwei
2019-08-29 14:06 ` [Qemu-riscv] " Chih-Min Chao
2019-09-02 8:17 ` [Qemu-devel] " liuzhiwei
2019-09-02 8:17 ` liuzhiwei
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.