From: LIU Zhiwei <zhiwei_liu@c-sky.com> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Cc: bin.meng@windriver.com, Palmer Dabbelt <palmerdabbelt@google.com>, richard.henderson@linaro.org, palmer@dabbelt.com, Alistair Francis <alistair.francis@wdc.com>, LIU Zhiwei <zhiwei_liu@c-sky.com> Subject: [PATCH v2 06/37] target/riscv: SIMD 8-bit Shift Instructions Date: Thu, 10 Jun 2021 15:58:37 +0800 [thread overview] Message-ID: <20210610075908.3305506-7-zhiwei_liu@c-sky.com> (raw) In-Reply-To: <20210610075908.3305506-1-zhiwei_liu@c-sky.com> Instructions include right arithmetic shift, right logic shift, and left shift. The shift can be an immediate or a register scalar. The right shift has rounding operation. And the left shift has saturation operation. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com> --- include/tcg/tcg-op-gvec.h | 9 +++ target/riscv/helper.h | 9 +++ target/riscv/insn32.decode | 17 ++++ target/riscv/insn_trans/trans_rvp.c.inc | 16 ++++ target/riscv/packed_helper.c | 102 ++++++++++++++++++++++++ tcg/tcg-op-gvec.c | 28 +++++++ 6 files changed, 181 insertions(+) diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h index 72cf697646..91531ecb0b 100644 --- a/include/tcg/tcg-op-gvec.h +++ b/include/tcg/tcg-op-gvec.h @@ -397,12 +397,15 @@ void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); @@ -416,6 +419,9 @@ void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); #define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64 #define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64 #define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64 +#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64 +#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64 +#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64 #else #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32 @@ -424,6 +430,9 @@ void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); #define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32 #define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32 #define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32 +#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32 +#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32 +#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32 #endif #endif diff --git a/target/riscv/helper.h b/target/riscv/helper.h index de7b4fc17d..1b365135ff 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1197,3 +1197,12 @@ DEF_HELPER_3(sll16, tl, env, tl, tl) DEF_HELPER_3(ksll16, tl, env, tl, tl) DEF_HELPER_3(kslra16, tl, env, tl, tl) DEF_HELPER_3(kslra16_u, tl, env, tl, tl) + +DEF_HELPER_3(sra8, tl, env, tl, tl) +DEF_HELPER_3(sra8_u, tl, env, tl, tl) +DEF_HELPER_3(srl8, tl, env, tl, tl) +DEF_HELPER_3(srl8_u, tl, env, tl, tl) +DEF_HELPER_3(sll8, tl, env, tl, tl) +DEF_HELPER_3(ksll8, tl, env, tl, tl) +DEF_HELPER_3(kslra8, tl, env, tl, tl) +DEF_HELPER_3(kslra8_u, tl, env, tl, tl) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 44c497f28a..8b78fb24bc 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -25,6 +25,7 @@ %sh7 20:7 %sh4 20:4 +%sh3 20:3 %csr 20:12 %rm 12:3 %nf 29:3 !function=ex_plus_1 @@ -63,6 +64,7 @@ @sh ...... ...... ..... ... ..... ....... &shift shamt=%sh7 %rs1 %rd @sh4 ...... ...... ..... ... ..... ....... &shift shamt=%sh4 %rs1 %rd +@sh3 ...... ...... ..... ... ..... ....... &shift shamt=%sh3 %rs1 %rd @csr ............ ..... ... ..... ....... %csr %rs1 %rd @atom_ld ..... aq:1 rl:1 ..... ........ ..... ....... &atomic rs2=0 %rs1 %rd @@ -792,3 +794,18 @@ ksll16 0110010 ..... ..... 000 ..... 1110111 @r kslli16 0111010 1.... ..... 000 ..... 1110111 @sh4 kslra16 0101011 ..... ..... 000 ..... 1110111 @r kslra16_u 0110011 ..... ..... 000 ..... 1110111 @r + +sra8 0101100 ..... ..... 000 ..... 1110111 @r +sra8_u 0110100 ..... ..... 000 ..... 1110111 @r +srai8 0111100 00... ..... 000 ..... 1110111 @sh3 +srai8_u 0111100 01... ..... 000 ..... 1110111 @sh3 +srl8 0101101 ..... ..... 000 ..... 1110111 @r +srl8_u 0110101 ..... ..... 000 ..... 1110111 @r +srli8 0111101 00... ..... 000 ..... 1110111 @sh3 +srli8_u 0111101 01... ..... 000 ..... 1110111 @sh3 +sll8 0101110 ..... ..... 000 ..... 1110111 @r +slli8 0111110 00... ..... 000 ..... 1110111 @sh3 +ksll8 0110110 ..... ..... 000 ..... 1110111 @r +kslli8 0111110 01... ..... 000 ..... 1110111 @sh3 +kslra8 0101111 ..... ..... 000 ..... 1110111 @r +kslra8_u 0110111 ..... ..... 000 ..... 1110111 @r diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc index afafa49824..e6c5f2ddf5 100644 --- a/target/riscv/insn_trans/trans_rvp.c.inc +++ b/target/riscv/insn_trans/trans_rvp.c.inc @@ -187,3 +187,19 @@ GEN_RVP_SHIFTI(slli16, tcg_gen_vec_shl16i_tl, gen_helper_sll16); GEN_RVP_SHIFTI(srai16_u, NULL, gen_helper_sra16_u); GEN_RVP_SHIFTI(srli16_u, NULL, gen_helper_srl16_u); GEN_RVP_SHIFTI(kslli16, NULL, gen_helper_ksll16); + +/* SIMD 8-bit Shift Instructions */ +GEN_RVP_R_OOL(sra8); +GEN_RVP_R_OOL(srl8); +GEN_RVP_R_OOL(sll8); +GEN_RVP_R_OOL(sra8_u); +GEN_RVP_R_OOL(srl8_u); +GEN_RVP_R_OOL(ksll8); +GEN_RVP_R_OOL(kslra8); +GEN_RVP_R_OOL(kslra8_u); +GEN_RVP_SHIFTI(srai8, tcg_gen_vec_sar8i_tl, gen_helper_sra8); +GEN_RVP_SHIFTI(srli8, tcg_gen_vec_shr8i_tl, gen_helper_srl8); +GEN_RVP_SHIFTI(slli8, tcg_gen_vec_shl8i_tl, gen_helper_sll8); +GEN_RVP_SHIFTI(srai8_u, NULL, gen_helper_sra8_u); +GEN_RVP_SHIFTI(srli8_u, NULL, gen_helper_srl8_u); +GEN_RVP_SHIFTI(kslli8, NULL, gen_helper_ksll8); diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c index 7e31c2fe46..ab9ebc472b 100644 --- a/target/riscv/packed_helper.c +++ b/target/riscv/packed_helper.c @@ -529,3 +529,105 @@ static inline void do_kslra16_u(CPURISCVState *env, void *vd, void *va, } RVPR(kslra16_u, 1, 2); + +/* SIMD 8-bit Shift Instructions */ +static inline void do_sra8(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int8_t *d = vd, *a = va; + uint8_t shift = *(uint8_t *)vb & 0x7; + d[i] = a[i] >> shift; +} + +RVPR(sra8, 1, 1); + +static inline void do_srl8(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint8_t *d = vd, *a = va; + uint8_t shift = *(uint8_t *)vb & 0x7; + d[i] = a[i] >> shift; +} + +RVPR(srl8, 1, 1); + +static inline void do_sll8(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint8_t *d = vd, *a = va; + uint8_t shift = *(uint8_t *)vb & 0x7; + d[i] = a[i] << shift; +} + +RVPR(sll8, 1, 1); + +static inline void do_sra8_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int8_t *d = vd, *a = va; + uint8_t shift = *(uint8_t *)vb & 0x7; + d[i] = vssra8(env, 0, a[i], shift); +} + +RVPR(sra8_u, 1, 1); + +static inline void do_srl8_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint8_t *d = vd, *a = va; + uint8_t shift = *(uint8_t *)vb & 0x7; + d[i] = vssrl8(env, 0, a[i], shift); +} + +RVPR(srl8_u, 1, 1); + +static inline void do_ksll8(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int8_t *d = vd, *a = va, result; + uint8_t shift = *(uint8_t *)vb & 0x7; + + result = a[i] << shift; + if (shift > (clrsb32(a[i]) - 24)) { + env->vxsat = 0x1; + d[i] = (a[i] & INT8_MIN) ? INT8_MIN : INT8_MAX; + } else { + d[i] = result; + } +} + +RVPR(ksll8, 1, 1); + +static inline void do_kslra8(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int8_t *d = vd, *a = va; + int32_t shift = sextract32((*(uint32_t *)vb), 0, 4); + + if (shift >= 0) { + do_ksll8(env, vd, va, vb, i); + } else { + shift = -shift; + shift = (shift == 8) ? 7 : shift; + d[i] = a[i] >> shift; + } +} + +RVPR(kslra8, 1, 1); + +static inline void do_kslra8_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int8_t *d = vd, *a = va; + int32_t shift = sextract32((*(uint32_t *)vb), 0, 4); + + if (shift >= 0) { + do_ksll8(env, vd, va, vb, i); + } else { + shift = -shift; + shift = (shift == 8) ? 7 : shift; + d[i] = vssra8(env, 0, a[i], shift); + } +} + +RVPR(kslra8_u, 1, 1); diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index cf1357cee1..f8d00a7ffa 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -2680,6 +2680,13 @@ void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_gen_andi_i64(d, d, mask); } +void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_8, 0xff << c); + tcg_gen_shli_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) { uint64_t mask = dup_const(MO_16, 0xffff << c); @@ -2738,6 +2745,13 @@ void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_gen_andi_i64(d, d, mask); } +void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_8, 0xff >> c); + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) { uint64_t mask = dup_const(MO_16, 0xffff >> c); @@ -2803,6 +2817,20 @@ void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_temp_free_i64(s); } +void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t s_mask = dup_const(MO_8, 0x80 >> c); + uint32_t c_mask = dup_const(MO_8, 0xff >> c); + TCGv_i32 s = tcg_temp_new_i32(); + + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */ + tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */ + tcg_gen_andi_i32(d, d, c_mask); /* clear out bits above sign */ + tcg_gen_or_i32(d, d, s); /* include sign extension */ + tcg_temp_free_i32(s); +} + void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) { uint64_t s_mask = dup_const(MO_16, 0x8000 >> c); -- 2.25.1
WARNING: multiple messages have this Message-ID (diff)
From: LIU Zhiwei <zhiwei_liu@c-sky.com> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Cc: Alistair.Francis@wdc.com, palmer@dabbelt.com, bin.meng@windriver.com, richard.henderson@linaro.org, LIU Zhiwei <zhiwei_liu@c-sky.com>, Alistair Francis <alistair.francis@wdc.com>, Palmer Dabbelt <palmerdabbelt@google.com> Subject: [PATCH v2 06/37] target/riscv: SIMD 8-bit Shift Instructions Date: Thu, 10 Jun 2021 15:58:37 +0800 [thread overview] Message-ID: <20210610075908.3305506-7-zhiwei_liu@c-sky.com> (raw) In-Reply-To: <20210610075908.3305506-1-zhiwei_liu@c-sky.com> Instructions include right arithmetic shift, right logic shift, and left shift. The shift can be an immediate or a register scalar. The right shift has rounding operation. And the left shift has saturation operation. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com> --- include/tcg/tcg-op-gvec.h | 9 +++ target/riscv/helper.h | 9 +++ target/riscv/insn32.decode | 17 ++++ target/riscv/insn_trans/trans_rvp.c.inc | 16 ++++ target/riscv/packed_helper.c | 102 ++++++++++++++++++++++++ tcg/tcg-op-gvec.c | 28 +++++++ 6 files changed, 181 insertions(+) diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h index 72cf697646..91531ecb0b 100644 --- a/include/tcg/tcg-op-gvec.h +++ b/include/tcg/tcg-op-gvec.h @@ -397,12 +397,15 @@ void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); @@ -416,6 +419,9 @@ void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); #define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64 #define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64 #define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64 +#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64 +#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64 +#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64 #else #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32 @@ -424,6 +430,9 @@ void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); #define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32 #define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32 #define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32 +#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32 +#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32 +#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32 #endif #endif diff --git a/target/riscv/helper.h b/target/riscv/helper.h index de7b4fc17d..1b365135ff 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1197,3 +1197,12 @@ DEF_HELPER_3(sll16, tl, env, tl, tl) DEF_HELPER_3(ksll16, tl, env, tl, tl) DEF_HELPER_3(kslra16, tl, env, tl, tl) DEF_HELPER_3(kslra16_u, tl, env, tl, tl) + +DEF_HELPER_3(sra8, tl, env, tl, tl) +DEF_HELPER_3(sra8_u, tl, env, tl, tl) +DEF_HELPER_3(srl8, tl, env, tl, tl) +DEF_HELPER_3(srl8_u, tl, env, tl, tl) +DEF_HELPER_3(sll8, tl, env, tl, tl) +DEF_HELPER_3(ksll8, tl, env, tl, tl) +DEF_HELPER_3(kslra8, tl, env, tl, tl) +DEF_HELPER_3(kslra8_u, tl, env, tl, tl) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 44c497f28a..8b78fb24bc 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -25,6 +25,7 @@ %sh7 20:7 %sh4 20:4 +%sh3 20:3 %csr 20:12 %rm 12:3 %nf 29:3 !function=ex_plus_1 @@ -63,6 +64,7 @@ @sh ...... ...... ..... ... ..... ....... &shift shamt=%sh7 %rs1 %rd @sh4 ...... ...... ..... ... ..... ....... &shift shamt=%sh4 %rs1 %rd +@sh3 ...... ...... ..... ... ..... ....... &shift shamt=%sh3 %rs1 %rd @csr ............ ..... ... ..... ....... %csr %rs1 %rd @atom_ld ..... aq:1 rl:1 ..... ........ ..... ....... &atomic rs2=0 %rs1 %rd @@ -792,3 +794,18 @@ ksll16 0110010 ..... ..... 000 ..... 1110111 @r kslli16 0111010 1.... ..... 000 ..... 1110111 @sh4 kslra16 0101011 ..... ..... 000 ..... 1110111 @r kslra16_u 0110011 ..... ..... 000 ..... 1110111 @r + +sra8 0101100 ..... ..... 000 ..... 1110111 @r +sra8_u 0110100 ..... ..... 000 ..... 1110111 @r +srai8 0111100 00... ..... 000 ..... 1110111 @sh3 +srai8_u 0111100 01... ..... 000 ..... 1110111 @sh3 +srl8 0101101 ..... ..... 000 ..... 1110111 @r +srl8_u 0110101 ..... ..... 000 ..... 1110111 @r +srli8 0111101 00... ..... 000 ..... 1110111 @sh3 +srli8_u 0111101 01... ..... 000 ..... 1110111 @sh3 +sll8 0101110 ..... ..... 000 ..... 1110111 @r +slli8 0111110 00... ..... 000 ..... 1110111 @sh3 +ksll8 0110110 ..... ..... 000 ..... 1110111 @r +kslli8 0111110 01... ..... 000 ..... 1110111 @sh3 +kslra8 0101111 ..... ..... 000 ..... 1110111 @r +kslra8_u 0110111 ..... ..... 000 ..... 1110111 @r diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc index afafa49824..e6c5f2ddf5 100644 --- a/target/riscv/insn_trans/trans_rvp.c.inc +++ b/target/riscv/insn_trans/trans_rvp.c.inc @@ -187,3 +187,19 @@ GEN_RVP_SHIFTI(slli16, tcg_gen_vec_shl16i_tl, gen_helper_sll16); GEN_RVP_SHIFTI(srai16_u, NULL, gen_helper_sra16_u); GEN_RVP_SHIFTI(srli16_u, NULL, gen_helper_srl16_u); GEN_RVP_SHIFTI(kslli16, NULL, gen_helper_ksll16); + +/* SIMD 8-bit Shift Instructions */ +GEN_RVP_R_OOL(sra8); +GEN_RVP_R_OOL(srl8); +GEN_RVP_R_OOL(sll8); +GEN_RVP_R_OOL(sra8_u); +GEN_RVP_R_OOL(srl8_u); +GEN_RVP_R_OOL(ksll8); +GEN_RVP_R_OOL(kslra8); +GEN_RVP_R_OOL(kslra8_u); +GEN_RVP_SHIFTI(srai8, tcg_gen_vec_sar8i_tl, gen_helper_sra8); +GEN_RVP_SHIFTI(srli8, tcg_gen_vec_shr8i_tl, gen_helper_srl8); +GEN_RVP_SHIFTI(slli8, tcg_gen_vec_shl8i_tl, gen_helper_sll8); +GEN_RVP_SHIFTI(srai8_u, NULL, gen_helper_sra8_u); +GEN_RVP_SHIFTI(srli8_u, NULL, gen_helper_srl8_u); +GEN_RVP_SHIFTI(kslli8, NULL, gen_helper_ksll8); diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c index 7e31c2fe46..ab9ebc472b 100644 --- a/target/riscv/packed_helper.c +++ b/target/riscv/packed_helper.c @@ -529,3 +529,105 @@ static inline void do_kslra16_u(CPURISCVState *env, void *vd, void *va, } RVPR(kslra16_u, 1, 2); + +/* SIMD 8-bit Shift Instructions */ +static inline void do_sra8(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int8_t *d = vd, *a = va; + uint8_t shift = *(uint8_t *)vb & 0x7; + d[i] = a[i] >> shift; +} + +RVPR(sra8, 1, 1); + +static inline void do_srl8(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint8_t *d = vd, *a = va; + uint8_t shift = *(uint8_t *)vb & 0x7; + d[i] = a[i] >> shift; +} + +RVPR(srl8, 1, 1); + +static inline void do_sll8(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint8_t *d = vd, *a = va; + uint8_t shift = *(uint8_t *)vb & 0x7; + d[i] = a[i] << shift; +} + +RVPR(sll8, 1, 1); + +static inline void do_sra8_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int8_t *d = vd, *a = va; + uint8_t shift = *(uint8_t *)vb & 0x7; + d[i] = vssra8(env, 0, a[i], shift); +} + +RVPR(sra8_u, 1, 1); + +static inline void do_srl8_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint8_t *d = vd, *a = va; + uint8_t shift = *(uint8_t *)vb & 0x7; + d[i] = vssrl8(env, 0, a[i], shift); +} + +RVPR(srl8_u, 1, 1); + +static inline void do_ksll8(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int8_t *d = vd, *a = va, result; + uint8_t shift = *(uint8_t *)vb & 0x7; + + result = a[i] << shift; + if (shift > (clrsb32(a[i]) - 24)) { + env->vxsat = 0x1; + d[i] = (a[i] & INT8_MIN) ? INT8_MIN : INT8_MAX; + } else { + d[i] = result; + } +} + +RVPR(ksll8, 1, 1); + +static inline void do_kslra8(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int8_t *d = vd, *a = va; + int32_t shift = sextract32((*(uint32_t *)vb), 0, 4); + + if (shift >= 0) { + do_ksll8(env, vd, va, vb, i); + } else { + shift = -shift; + shift = (shift == 8) ? 7 : shift; + d[i] = a[i] >> shift; + } +} + +RVPR(kslra8, 1, 1); + +static inline void do_kslra8_u(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int8_t *d = vd, *a = va; + int32_t shift = sextract32((*(uint32_t *)vb), 0, 4); + + if (shift >= 0) { + do_ksll8(env, vd, va, vb, i); + } else { + shift = -shift; + shift = (shift == 8) ? 7 : shift; + d[i] = vssra8(env, 0, a[i], shift); + } +} + +RVPR(kslra8_u, 1, 1); diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index cf1357cee1..f8d00a7ffa 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -2680,6 +2680,13 @@ void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_gen_andi_i64(d, d, mask); } +void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_8, 0xff << c); + tcg_gen_shli_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) { uint64_t mask = dup_const(MO_16, 0xffff << c); @@ -2738,6 +2745,13 @@ void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_gen_andi_i64(d, d, mask); } +void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_8, 0xff >> c); + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) { uint64_t mask = dup_const(MO_16, 0xffff >> c); @@ -2803,6 +2817,20 @@ void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_temp_free_i64(s); } +void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t s_mask = dup_const(MO_8, 0x80 >> c); + uint32_t c_mask = dup_const(MO_8, 0xff >> c); + TCGv_i32 s = tcg_temp_new_i32(); + + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */ + tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */ + tcg_gen_andi_i32(d, d, c_mask); /* clear out bits above sign */ + tcg_gen_or_i32(d, d, s); /* include sign extension */ + tcg_temp_free_i32(s); +} + void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) { uint64_t s_mask = dup_const(MO_16, 0x8000 >> c); -- 2.25.1
next prev parent reply other threads:[~2021-06-10 8:06 UTC|newest] Thread overview: 88+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-06-10 7:58 [PATCH v2 00/37] target/riscv: support packed extension v0.9.4 LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 01/37] target/riscv: implementation-defined constant parameters LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 02/37] target/riscv: Make the vector helper functions public LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 03/37] target/riscv: 16-bit Addition & Subtraction Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 18:00 ` Richard Henderson 2021-06-10 18:00 ` Richard Henderson 2021-06-10 7:58 ` [PATCH v2 04/37] target/riscv: 8-bit Addition & Subtraction Instruction LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 19:39 ` Richard Henderson 2021-06-10 19:39 ` Richard Henderson 2021-06-11 4:36 ` LIU Zhiwei 2021-06-11 4:36 ` LIU Zhiwei 2021-06-24 6:05 ` LIU Zhiwei 2021-06-24 6:05 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 05/37] target/riscv: SIMD 16-bit Shift Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 19:44 ` Richard Henderson 2021-06-10 19:44 ` Richard Henderson 2021-06-10 7:58 ` LIU Zhiwei [this message] 2021-06-10 7:58 ` [PATCH v2 06/37] target/riscv: SIMD 8-bit " LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 07/37] target/riscv: SIMD 16-bit Compare Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 08/37] target/riscv: SIMD 8-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 09/37] target/riscv: SIMD 16-bit Multiply Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 10/37] target/riscv: SIMD 8-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 11/37] target/riscv: SIMD 16-bit Miscellaneous Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 12/37] target/riscv: SIMD 8-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 13/37] target/riscv: 8-bit Unpacking Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 14/37] target/riscv: 16-bit Packing Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 15/37] target/riscv: Signed MSW 32x32 Multiply and Add Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 16/37] target/riscv: Signed MSW 32x16 " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 17/37] target/riscv: Signed 16-bit Multiply 32-bit Add/Subtract Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 18/37] target/riscv: Signed 16-bit Multiply 64-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 19/37] target/riscv: Partial-SIMD Miscellaneous Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 20/37] target/riscv: 8-bit Multiply with 32-bit Add Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 21/37] target/riscv: 64-bit Add/Subtract Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 22/37] target/riscv: 32-bit Multiply " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 23/37] target/riscv: Signed 16-bit Multiply with " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 24/37] target/riscv: Non-SIMD Q15 saturation ALU Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 25/37] target/riscv: Non-SIMD Q31 " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 26/37] target/riscv: 32-bit Computation Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 27/37] target/riscv: Non-SIMD Miscellaneous Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 28/37] target/riscv: RV64 Only SIMD 32-bit Add/Subtract Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 29/37] target/riscv: RV64 Only SIMD 32-bit Shift Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 30/37] target/riscv: RV64 Only SIMD 32-bit Miscellaneous Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 31/37] target/riscv: RV64 Only SIMD Q15 saturating Multiply Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 32/37] target/riscv: RV64 Only 32-bit " LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 33/37] target/riscv: RV64 Only 32-bit Multiply & Add Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 34/37] target/riscv: RV64 Only 32-bit Parallel " LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 35/37] target/riscv: RV64 Only Non-SIMD 32-bit Shift Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 36/37] target/riscv: RV64 Only 32-bit Packing Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 37/37] target/riscv: configure and turn on packed extension from command line LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-14 22:55 ` [PATCH v2 00/37] target/riscv: support packed extension v0.9.4 no-reply 2021-06-14 22:55 ` no-reply
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210610075908.3305506-7-zhiwei_liu@c-sky.com \ --to=zhiwei_liu@c-sky.com \ --cc=alistair.francis@wdc.com \ --cc=bin.meng@windriver.com \ --cc=palmer@dabbelt.com \ --cc=palmerdabbelt@google.com \ --cc=qemu-devel@nongnu.org \ --cc=qemu-riscv@nongnu.org \ --cc=richard.henderson@linaro.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.