All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 25/29] target/arm: Convert Neon narrowing shifts with op==8 to decodetree
Date: Fri,  5 Jun 2020 17:50:03 +0100	[thread overview]
Message-ID: <20200605165007.12095-26-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200605165007.12095-1-peter.maydell@linaro.org>

Convert the Neon narrowing shifts where op==8 to decodetree:
 * VSHRN
 * VRSHRN
 * VQSHRUN
 * VQRSHRUN

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200522145520.6778-6-peter.maydell@linaro.org
---
 target/arm/neon-dp.decode       |  27 ++++++
 target/arm/translate-neon.inc.c | 167 ++++++++++++++++++++++++++++++++
 target/arm/translate.c          |   1 +
 3 files changed, 195 insertions(+)

diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index 66c41a53e59..8161995aee8 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -232,6 +232,17 @@ VMINNM_fp_3s     1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
 @2reg_shl_b      .... ... . . . 001 shift:3  .... .... 0 q:1 . . .... \
                  &2reg_shift vm=%vm_dp vd=%vd_dp size=0
 
+# Narrowing right shifts: here the Q bit is part of the opcode decode
+@2reg_shrn_d     .... ... . . . 1 ..... .... .... 0 . . . .... \
+                 &2reg_shift vm=%vm_dp vd=%vd_dp size=3 q=0 \
+                 shift=%neon_rshift_i5
+@2reg_shrn_s     .... ... . . . 01 .... .... .... 0 . . . .... \
+                 &2reg_shift vm=%vm_dp vd=%vd_dp size=2 q=0 \
+                 shift=%neon_rshift_i4
+@2reg_shrn_h     .... ... . . . 001 ... .... .... 0 . . . .... \
+                 &2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0 \
+                 shift=%neon_rshift_i3
+
 VSHR_S_2sh       1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d
 VSHR_S_2sh       1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s
 VSHR_S_2sh       1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h
@@ -301,3 +312,19 @@ VQSHL_U_64_2sh   1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
 VQSHL_U_2sh      1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s
 VQSHL_U_2sh      1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h
 VQSHL_U_2sh      1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b
+
+VSHRN_64_2sh     1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_d
+VSHRN_32_2sh     1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_s
+VSHRN_16_2sh     1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h
+
+VRSHRN_64_2sh    1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d
+VRSHRN_32_2sh    1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s
+VRSHRN_16_2sh    1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h
+
+VQSHRUN_64_2sh   1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_d
+VQSHRUN_32_2sh   1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_s
+VQSHRUN_16_2sh   1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h
+
+VQRSHRUN_64_2sh  1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d
+VQRSHRUN_32_2sh  1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s
+VQRSHRUN_16_2sh  1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index baa985b16c3..fe3fb7f62f3 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -1396,3 +1396,170 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
 DO_2SHIFT_ENV(VQSHLU, qshlu_s)
 DO_2SHIFT_ENV(VQSHL_U, qshl_u)
 DO_2SHIFT_ENV(VQSHL_S, qshl_s)
+
+static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
+                                NeonGenTwo64OpFn *shiftfn,
+                                NeonGenNarrowEnvFn *narrowfn)
+{
+    /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
+    TCGv_i64 constimm, rm1, rm2;
+    TCGv_i32 rd;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vm & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This is always a right shift, and the shiftfn is always a
+     * left-shift helper, which thus needs the negated shift count.
+     */
+    constimm = tcg_const_i64(-a->shift);
+    rm1 = tcg_temp_new_i64();
+    rm2 = tcg_temp_new_i64();
+
+    /* Load both inputs first to avoid potential overwrite if rm == rd */
+    neon_load_reg64(rm1, a->vm);
+    neon_load_reg64(rm2, a->vm + 1);
+
+    shiftfn(rm1, rm1, constimm);
+    rd = tcg_temp_new_i32();
+    narrowfn(rd, cpu_env, rm1);
+    neon_store_reg(a->vd, 0, rd);
+
+    shiftfn(rm2, rm2, constimm);
+    rd = tcg_temp_new_i32();
+    narrowfn(rd, cpu_env, rm2);
+    neon_store_reg(a->vd, 1, rd);
+
+    tcg_temp_free_i64(rm1);
+    tcg_temp_free_i64(rm2);
+    tcg_temp_free_i64(constimm);
+
+    return true;
+}
+
+static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
+                                NeonGenTwoOpFn *shiftfn,
+                                NeonGenNarrowEnvFn *narrowfn)
+{
+    /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
+    TCGv_i32 constimm, rm1, rm2, rm3, rm4;
+    TCGv_i64 rtmp;
+    uint32_t imm;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vm & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This is always a right shift, and the shiftfn is always a
+     * left-shift helper, which thus needs the negated shift count
+     * duplicated into each lane of the immediate value.
+     */
+    if (a->size == 1) {
+        imm = (uint16_t)(-a->shift);
+        imm |= imm << 16;
+    } else {
+        /* size == 2 */
+        imm = -a->shift;
+    }
+    constimm = tcg_const_i32(imm);
+
+    /* Load all inputs first to avoid potential overwrite */
+    rm1 = neon_load_reg(a->vm, 0);
+    rm2 = neon_load_reg(a->vm, 1);
+    rm3 = neon_load_reg(a->vm + 1, 0);
+    rm4 = neon_load_reg(a->vm + 1, 1);
+    rtmp = tcg_temp_new_i64();
+
+    shiftfn(rm1, rm1, constimm);
+    shiftfn(rm2, rm2, constimm);
+
+    tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
+    tcg_temp_free_i32(rm2);
+
+    narrowfn(rm1, cpu_env, rtmp);
+    neon_store_reg(a->vd, 0, rm1);
+
+    shiftfn(rm3, rm3, constimm);
+    shiftfn(rm4, rm4, constimm);
+    tcg_temp_free_i32(constimm);
+
+    tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
+    tcg_temp_free_i32(rm4);
+
+    narrowfn(rm3, cpu_env, rtmp);
+    tcg_temp_free_i64(rtmp);
+    neon_store_reg(a->vd, 1, rm3);
+    return true;
+}
+
+#define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
+    }
+#define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
+    }
+
+static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+    tcg_gen_extrl_i64_i32(dest, src);
+}
+
+static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+    gen_helper_neon_narrow_u16(dest, src);
+}
+
+static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+    gen_helper_neon_narrow_u8(dest, src);
+}
+
+DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
+DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
+DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
+
+DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
+DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
+DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
+
+DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
+DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
+DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
+
+DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
+DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
+DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
diff --git a/target/arm/translate.c b/target/arm/translate.c
index c32a16085c7..11330b92966 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5280,6 +5280,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
             case 5: /* VSHL, VSLI */
             case 6: /* VQSHLU */
             case 7: /* VQSHL */
+            case 8: /* VSHRN, VRSHRN, VQSHRUN, VQRSHRUN */
                 return 1; /* handled by decodetree */
             default:
                 break;
-- 
2.20.1



  parent reply	other threads:[~2020-06-05 17:01 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-05 16:49 [PULL 00/29] target-arm queue Peter Maydell
2020-06-05 16:49 ` [PULL 01/29] hw/ssi/imx_spi: changed while statement to prevent underflow Peter Maydell
2020-06-05 16:49 ` [PULL 02/29] hw/ssi/imx_spi: Removed unnecessary cast of rx data received from slave Peter Maydell
2020-06-05 16:49 ` [PULL 03/29] hw/input/pxa2xx_keypad: Replace hw_error() by qemu_log_mask() Peter Maydell
2020-06-05 16:49 ` [PULL 04/29] hw/arm/pxa2xx: Replace printf() call " Peter Maydell
2020-06-05 16:49 ` [PULL 05/29] target/arm: Convert aes and sm4 to gvec helpers Peter Maydell
2020-06-05 16:49 ` [PULL 06/29] target/arm: Convert rax1 " Peter Maydell
2020-06-05 16:49 ` [PULL 07/29] target/arm: Convert sha512 and sm3 " Peter Maydell
2020-06-05 16:49 ` [PULL 08/29] target/arm: Convert sha1 and sha256 " Peter Maydell
2020-06-05 16:49 ` [PULL 09/29] target/arm: Split helper_crypto_sha1_3reg Peter Maydell
2020-06-05 16:49 ` [PULL 10/29] target/arm: Split helper_crypto_sm3tt Peter Maydell
2020-06-05 16:49 ` [PULL 11/29] hw/adc/stm32f2xx_adc: Correct memory region size and access size Peter Maydell
2020-06-05 16:49 ` [PULL 12/29] tests/acceptance: Add a boot test for the xlnx-versal-virt machine Peter Maydell
2020-06-05 16:49 ` [PULL 13/29] docs/system: Document Aspeed boards Peter Maydell
2020-06-05 16:49 ` [PULL 14/29] raspi: add BCM2835 SOC MPHI emulation Peter Maydell
2020-06-05 16:49 ` [PULL 15/29] dwc-hsotg (dwc2) USB host controller register definitions Peter Maydell
2020-07-04  9:17   ` Philippe Mathieu-Daudé
2020-07-04 10:35     ` Philippe Mathieu-Daudé
2020-06-05 16:49 ` [PULL 16/29] dwc-hsotg (dwc2) USB host controller state definitions Peter Maydell
2020-06-05 16:49 ` [PULL 17/29] dwc-hsotg (dwc2) USB host controller emulation Peter Maydell
2020-06-05 16:49 ` [PULL 18/29] usb: add short-packet handling to usb-storage driver Peter Maydell
2020-06-05 16:49 ` [PULL 19/29] wire in the dwc-hsotg (dwc2) USB host controller emulation Peter Maydell
2020-06-05 16:49 ` [PULL 20/29] raspi2 acceptance test: add test for dwc-hsotg (dwc2) USB host Peter Maydell
2020-06-05 16:49 ` [PULL 21/29] target/arm: Convert Neon VSHL and VSLI 2-reg-shift insn to decodetree Peter Maydell
2020-06-05 16:50 ` [PULL 22/29] target/arm: Convert Neon VSHR 2-reg-shift insns " Peter Maydell
2020-06-05 16:50 ` [PULL 23/29] target/arm: Convert Neon VSRA, VSRI, VRSHR, VRSRA " Peter Maydell
2020-06-05 16:50 ` [PULL 24/29] target/arm: Convert VQSHLU, VQSHL " Peter Maydell
2020-06-05 16:50 ` Peter Maydell [this message]
2020-06-05 16:50 ` [PULL 26/29] target/arm: Convert Neon narrowing shifts with op==9 " Peter Maydell
2020-06-05 16:50 ` [PULL 27/29] target/arm: Convert Neon VSHLL, VMOVL " Peter Maydell
2020-06-05 16:50 ` [PULL 28/29] target/arm: Convert VCVT fixed-point ops " Peter Maydell
2020-06-05 16:50 ` [PULL 29/29] target/arm: Convert Neon one-register-and-immediate insns " Peter Maydell
2020-06-05 20:10 ` [PULL 00/29] target-arm queue no-reply
2020-06-08 10:04 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200605165007.12095-26-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.