qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-arm@nongnu.org, qemu-devel@nongnu.org
Cc: Richard Henderson <richard.henderson@linaro.org>
Subject: [PATCH 06/10] target/arm: Convert Neon narrowing shifts with op==8 to decodetree
Date: Fri, 15 May 2020 15:20:52 +0100	[thread overview]
Message-ID: <20200515142056.21346-7-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200515142056.21346-1-peter.maydell@linaro.org>

Convert the Neon narrowing shifts where op==8 to decodetree:
 * VSHRN
 * VRSHRN
 * VQSHRUN
 * VQRSHRUN

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/neon-dp.decode       |  32 ++++++
 target/arm/translate-neon.inc.c | 168 ++++++++++++++++++++++++++++++++
 target/arm/translate.c          |   1 +
 3 files changed, 201 insertions(+)

diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index 6456b53a690..f8d19c5819c 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -208,6 +208,10 @@ VMINNM_fp_3s     1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
 
 @2reg_shift      .... ... . . . ...... .... .... . q:1 . . .... \
                  &2reg_shift vm=%vm_dp vd=%vd_dp
+@2reg_shift_q0   .... ... . . . ...... .... .... . 0 . . .... \
+                 &2reg_shift vm=%vm_dp vd=%vd_dp q=0
+@2reg_shift_q1   .... ... . . . ...... .... .... . 1 . . .... \
+                 &2reg_shift vm=%vm_dp vd=%vd_dp q=1
 
 # Right shifts are encoded as N - shift, where N is the element size in bits.
 %neon_rshift_i6  16:6 !function=rsub_64
@@ -340,3 +344,31 @@ VQSHL_U_2sh      1111 001 1 1 . 01 shift:4  .... 0111 0 . . 1 .... \
                  @2reg_shift size=1
 VQSHL_U_2sh      1111 001 1 1 . 001 shift:3 .... 0111 0 . . 1 .... \
                  @2reg_shift size=0
+
+VSHRN_64_2sh     1111 001 0 1 . 1 .....     .... 1000 0 . . 1 .... \
+                 @2reg_shift_q0 size=3 shift=%neon_rshift_i5
+VSHRN_32_2sh     1111 001 0 1 . 01 ....     .... 1000 0 . . 1 .... \
+                 @2reg_shift_q0 size=2 shift=%neon_rshift_i4
+VSHRN_16_2sh     1111 001 0 1 . 001 ...     .... 1000 0 . . 1 .... \
+                 @2reg_shift_q0 size=1 shift=%neon_rshift_i3
+
+VRSHRN_64_2sh    1111 001 0 1 . 1 .....     .... 1000 0 . . 1 .... \
+                 @2reg_shift_q1 size=3 shift=%neon_rshift_i5
+VRSHRN_32_2sh    1111 001 0 1 . 01 ....     .... 1000 0 . . 1 .... \
+                 @2reg_shift_q1 size=2 shift=%neon_rshift_i4
+VRSHRN_16_2sh    1111 001 0 1 . 001 ...     .... 1000 0 . . 1 .... \
+                 @2reg_shift_q1 size=1 shift=%neon_rshift_i3
+
+VQSHRUN_64_2sh   1111 001 1 1 . 1 .....     .... 1000 0 . . 1 .... \
+                 @2reg_shift_q0 size=3 shift=%neon_rshift_i5
+VQSHRUN_32_2sh   1111 001 1 1 . 01 ....     .... 1000 0 . . 1 .... \
+                 @2reg_shift_q0 size=2 shift=%neon_rshift_i4
+VQSHRUN_16_2sh   1111 001 1 1 . 001 ...     .... 1000 0 . . 1 .... \
+                 @2reg_shift_q0 size=1 shift=%neon_rshift_i3
+
+VQRSHRUN_64_2sh  1111 001 1 1 . 1 .....     .... 1000 0 . . 1 .... \
+                 @2reg_shift_q1 size=3 shift=%neon_rshift_i5
+VQRSHRUN_32_2sh  1111 001 1 1 . 01 ....     .... 1000 0 . . 1 .... \
+                 @2reg_shift_q1 size=2 shift=%neon_rshift_i4
+VQRSHRUN_16_2sh  1111 001 1 1 . 001 ...     .... 1000 0 . . 1 .... \
+                 @2reg_shift_q1 size=1 shift=%neon_rshift_i3
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 396db55565f..18ea7255e38 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -1504,3 +1504,171 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
 DO_2SHIFT_ENV(VQSHLU, qshlu_s)
 DO_2SHIFT_ENV(VQSHL_U, qshl_u)
 DO_2SHIFT_ENV(VQSHL_S, qshl_s)
+
+static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
+                                NeonGenTwo64OpFn *shiftfn,
+                                NeonGenNarrowEnvFn *narrowfn)
+{
+    /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
+    TCGv_i64 constimm, rm1, rm2;
+    TCGv_i32 rd;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vm & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This is always a right shift, and the shiftfn is always a
+     * left-shift helper, which thus needs the negated shift count.
+     */
+    constimm = tcg_const_i64(-a->shift);
+    rm1 = tcg_temp_new_i64();
+    rm2 = tcg_temp_new_i64();
+
+    /* Load both inputs first to avoid potential overwrite if rm == rd */
+    neon_load_reg64(rm1, a->vm);
+    neon_load_reg64(rm2, a->vm + 1);
+
+    shiftfn(rm1, rm1, constimm);
+    rd = tcg_temp_new_i32();
+    narrowfn(rd, cpu_env, rm1);
+    neon_store_reg(a->vd, 0, rd);
+
+    shiftfn(rm2, rm2, constimm);
+    rd = tcg_temp_new_i32();
+    narrowfn(rd, cpu_env, rm2);
+    neon_store_reg(a->vd, 1, rd);
+
+    tcg_temp_free_i64(rm1);
+    tcg_temp_free_i64(rm2);
+    tcg_temp_free_i64(constimm);
+
+    return true;
+}
+
+static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
+                                NeonGenTwoOpFn *shiftfn,
+                                NeonGenNarrowEnvFn *narrowfn)
+{
+    /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
+    TCGv_i32 constimm, rm1, rm2, rm3, rm4;
+    TCGv_i64 rtmp;
+    uint32_t imm;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vm & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This is always a right shift, and the shiftfn is always a
+     * left-shift helper, which thus needs the negated shift count
+     * duplicated into each lane of the immediate value.
+     */
+    if (a->size == 1) {
+        imm = (uint16_t)(-a->shift);
+        imm |= imm << 16;
+    } else {
+        /* size == 2 */
+        imm = -a->shift;
+    }
+    constimm = tcg_const_i32(imm);
+
+    /* Load all inputs first to avoid potential overwrite */
+    rm1 = neon_load_reg(a->vm, 0);
+    rm2 = neon_load_reg(a->vm, 1);
+    rm3 = neon_load_reg(a->vm + 1, 0);
+    rm4 = neon_load_reg(a->vm + 1, 1);
+    rtmp = tcg_temp_new_i64();
+
+    // todo expand out the shift-narrow and the narrow-op
+    shiftfn(rm1, rm1, constimm);
+    shiftfn(rm2, rm2, constimm);
+
+    tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
+    tcg_temp_free_i32(rm2);
+
+    narrowfn(rm1, cpu_env, rtmp);
+    neon_store_reg(a->vd, 0, rm1);
+
+    shiftfn(rm3, rm3, constimm);
+    shiftfn(rm4, rm4, constimm);
+    tcg_temp_free_i32(constimm);
+
+    tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
+    tcg_temp_free_i32(rm4);
+
+    narrowfn(rm3, cpu_env, rtmp);
+    tcg_temp_free_i64(rtmp);
+    neon_store_reg(a->vd, 1, rm3);
+    return true;
+}
+
+#define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
+    }
+#define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
+    }
+
+static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+    tcg_gen_extrl_i64_i32(dest, src);
+}
+
+static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+    gen_helper_neon_narrow_u16(dest, src);
+}
+
+static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+    gen_helper_neon_narrow_u8(dest, src);
+}
+
+DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
+DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
+DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
+
+DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
+DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
+DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
+
+DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
+DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
+DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
+
+DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
+DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
+DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
diff --git a/target/arm/translate.c b/target/arm/translate.c
index d711d39eb9d..f884db535b4 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5280,6 +5280,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
             case 5: /* VSHL, VSLI */
             case 6: /* VQSHLU */
             case 7: /* VQSHL */
+            case 8: /* VSHRN, VRSHRN, VQSHRUN, VQRSHRUN */
                 return 1; /* handled by decodetree */
             default:
                 break;
-- 
2.20.1



  parent reply	other threads:[~2020-05-15 14:25 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-15 14:20 [PATCH 00/10] target/arm: Convert 2-reg-shift and 1-reg-imm Neon insns to decodetree Peter Maydell
2020-05-15 14:20 ` [PATCH 01/10] target/arm: Remove unused GEN_NEON_INTEGER_OP macro Peter Maydell
2020-05-15 22:07   ` Richard Henderson
2020-05-15 14:20 ` [PATCH 02/10] target/arm: Convert Neon VSHL and VSLI 2-reg-shift insn to decodetree Peter Maydell
2020-05-15 22:16   ` Richard Henderson
2020-05-15 14:20 ` [PATCH 03/10] target/arm: Convert Neon VSHR 2-reg-shift insns " Peter Maydell
2020-05-15 22:33   ` Richard Henderson
2020-05-15 22:48   ` Richard Henderson
2020-05-15 14:20 ` [PATCH 04/10] target/arm: Convert Neon VSRA, VSRI, VRSHR, VRSRA " Peter Maydell
2020-05-15 22:50   ` Richard Henderson
2020-05-15 14:20 ` [PATCH 05/10] target/arm: Convert VQSHLU, VQSHL " Peter Maydell
2020-05-15 22:55   ` Richard Henderson
2020-05-15 14:20 ` Peter Maydell [this message]
2020-05-16  2:01   ` [PATCH 06/10] target/arm: Convert Neon narrowing shifts with op==8 " Richard Henderson
2020-05-15 14:20 ` [PATCH 07/10] target/arm: Convert Neon narrowing shifts with op==9 " Peter Maydell
2020-05-16  2:05   ` Richard Henderson
2020-05-15 14:20 ` [PATCH 08/10] target/arm: Convert Neon VSHLL, VMOVL " Peter Maydell
2020-05-16  2:18   ` Richard Henderson
2020-05-15 14:20 ` [PATCH 09/10] target/arm: Convert VCVT fixed-point ops " Peter Maydell
2020-05-16  2:27   ` Richard Henderson
2020-05-15 14:20 ` [PATCH 10/10] target/arm: Convert Neon one-register-and-immediate insns " Peter Maydell
2020-05-16  2:50   ` Richard Henderson
2020-05-22 14:31     ` Peter Maydell
2020-05-15 21:32 ` [PATCH 00/10] target/arm: Convert 2-reg-shift and 1-reg-imm Neon " no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200515142056.21346-7-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).