All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 27/29] target/arm: Convert Neon VSHLL, VMOVL to decodetree
Date: Fri,  5 Jun 2020 17:50:05 +0100	[thread overview]
Message-ID: <20200605165007.12095-28-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200605165007.12095-1-peter.maydell@linaro.org>

Convert the VSHLL and VMOVL insns from the 2-reg-shift group
to decodetree. Since the loop always has two passes, we unroll
it to avoid the awkward reassignment of one TCGv to another.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200522145520.6778-8-peter.maydell@linaro.org
---
 target/arm/neon-dp.decode       | 16 +++++++
 target/arm/translate-neon.inc.c | 81 +++++++++++++++++++++++++++++++++
 target/arm/translate.c          | 46 +------------------
 3 files changed, 99 insertions(+), 44 deletions(-)

diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index 79d0bfdd70e..3dde699e97e 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -243,6 +243,14 @@ VMINNM_fp_3s     1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
                  &2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0 \
                  shift=%neon_rshift_i3
 
+# Long left shifts: again Q is part of opcode decode
+@2reg_shll_s     .... ... . . . 1 shift:5    .... .... 0 . . . .... \
+                 &2reg_shift vm=%vm_dp vd=%vd_dp size=2 q=0
+@2reg_shll_h     .... ... . . . 01 shift:4   .... .... 0 . . . .... \
+                 &2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0
+@2reg_shll_b     .... ... . . . 001 shift:3  .... .... 0 . . . .... \
+                 &2reg_shift vm=%vm_dp vd=%vd_dp size=0 q=0
+
 VSHR_S_2sh       1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d
 VSHR_S_2sh       1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s
 VSHR_S_2sh       1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h
@@ -348,3 +356,11 @@ VQSHRN_U16_2sh   1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h
 VQRSHRN_U64_2sh  1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d
 VQRSHRN_U32_2sh  1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s
 VQRSHRN_U16_2sh  1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h
+
+VSHLL_S_2sh      1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s
+VSHLL_S_2sh      1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h
+VSHLL_S_2sh      1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b
+
+VSHLL_U_2sh      1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s
+VSHLL_U_2sh      1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h
+VSHLL_U_2sh      1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 562470ca08c..3d566044f3d 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -1578,3 +1578,84 @@ DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
+
+static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
+                         NeonGenWidenFn *widenfn, bool u)
+{
+    TCGv_i64 tmp;
+    TCGv_i32 rm0, rm1;
+    uint64_t widen_mask = 0;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This is a widen-and-shift operation. The shift is always less
+     * than the width of the source type, so after widening the input
+     * vector we can simply shift the whole 64-bit widened register,
+     * and then clear the potential overflow bits resulting from left
+     * bits of the narrow input appearing as right bits of the left
+     * neighbour narrow input. Calculate a mask of bits to clear.
+     */
+    if ((a->shift != 0) && (a->size < 2 || u)) {
+        int esize = 8 << a->size;
+        widen_mask = MAKE_64BIT_MASK(0, esize);
+        widen_mask >>= esize - a->shift;
+        widen_mask = dup_const(a->size + 1, widen_mask);
+    }
+
+    rm0 = neon_load_reg(a->vm, 0);
+    rm1 = neon_load_reg(a->vm, 1);
+    tmp = tcg_temp_new_i64();
+
+    widenfn(tmp, rm0);
+    if (a->shift != 0) {
+        tcg_gen_shli_i64(tmp, tmp, a->shift);
+        tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
+    }
+    neon_store_reg64(tmp, a->vd);
+
+    widenfn(tmp, rm1);
+    if (a->shift != 0) {
+        tcg_gen_shli_i64(tmp, tmp, a->shift);
+        tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
+    }
+    neon_store_reg64(tmp, a->vd + 1);
+    tcg_temp_free_i64(tmp);
+    return true;
+}
+
+static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+    NeonGenWidenFn *widenfn[] = {
+        gen_helper_neon_widen_s8,
+        gen_helper_neon_widen_s16,
+        tcg_gen_ext_i32_i64,
+    };
+    return do_vshll_2sh(s, a, widenfn[a->size], false);
+}
+
+static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+    NeonGenWidenFn *widenfn[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+    };
+    return do_vshll_2sh(s, a, widenfn[a->size], true);
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 883c1a29c7b..a9f52049e7c 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5248,6 +5248,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
             case 7: /* VQSHL */
             case 8: /* VSHRN, VRSHRN, VQSHRUN, VQRSHRUN */
             case 9: /* VQSHRN, VQRSHRN */
+            case 10: /* VSHLL, including VMOVL */
                 return 1; /* handled by decodetree */
             default:
                 break;
@@ -5265,50 +5266,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                     size--;
             }
             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
-            if (op == 10) {
-                /* VSHLL, VMOVL */
-                if (q || (rd & 1)) {
-                    return 1;
-                }
-                tmp = neon_load_reg(rm, 0);
-                tmp2 = neon_load_reg(rm, 1);
-                for (pass = 0; pass < 2; pass++) {
-                    if (pass == 1)
-                        tmp = tmp2;
-
-                    gen_neon_widen(cpu_V0, tmp, size, u);
-
-                    if (shift != 0) {
-                        /* The shift is less than the width of the source
-                           type, so we can just shift the whole register.  */
-                        tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
-                        /* Widen the result of shift: we need to clear
-                         * the potential overflow bits resulting from
-                         * left bits of the narrow input appearing as
-                         * right bits of left the neighbour narrow
-                         * input.  */
-                        if (size < 2 || !u) {
-                            uint64_t imm64;
-                            if (size == 0) {
-                                imm = (0xffu >> (8 - shift));
-                                imm |= imm << 16;
-                            } else if (size == 1) {
-                                imm = 0xffff >> (16 - shift);
-                            } else {
-                                /* size == 2 */
-                                imm = 0xffffffff >> (32 - shift);
-                            }
-                            if (size < 2) {
-                                imm64 = imm | (((uint64_t)imm) << 32);
-                            } else {
-                                imm64 = imm;
-                            }
-                            tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
-                        }
-                    }
-                    neon_store_reg64(cpu_V0, rd + pass);
-                }
-            } else if (op >= 14) {
+            if (op >= 14) {
                 /* VCVT fixed-point.  */
                 TCGv_ptr fpst;
                 TCGv_i32 shiftv;
-- 
2.20.1



  parent reply	other threads:[~2020-06-05 17:04 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-05 16:49 [PULL 00/29] target-arm queue Peter Maydell
2020-06-05 16:49 ` [PULL 01/29] hw/ssi/imx_spi: changed while statement to prevent underflow Peter Maydell
2020-06-05 16:49 ` [PULL 02/29] hw/ssi/imx_spi: Removed unnecessary cast of rx data received from slave Peter Maydell
2020-06-05 16:49 ` [PULL 03/29] hw/input/pxa2xx_keypad: Replace hw_error() by qemu_log_mask() Peter Maydell
2020-06-05 16:49 ` [PULL 04/29] hw/arm/pxa2xx: Replace printf() call " Peter Maydell
2020-06-05 16:49 ` [PULL 05/29] target/arm: Convert aes and sm4 to gvec helpers Peter Maydell
2020-06-05 16:49 ` [PULL 06/29] target/arm: Convert rax1 " Peter Maydell
2020-06-05 16:49 ` [PULL 07/29] target/arm: Convert sha512 and sm3 " Peter Maydell
2020-06-05 16:49 ` [PULL 08/29] target/arm: Convert sha1 and sha256 " Peter Maydell
2020-06-05 16:49 ` [PULL 09/29] target/arm: Split helper_crypto_sha1_3reg Peter Maydell
2020-06-05 16:49 ` [PULL 10/29] target/arm: Split helper_crypto_sm3tt Peter Maydell
2020-06-05 16:49 ` [PULL 11/29] hw/adc/stm32f2xx_adc: Correct memory region size and access size Peter Maydell
2020-06-05 16:49 ` [PULL 12/29] tests/acceptance: Add a boot test for the xlnx-versal-virt machine Peter Maydell
2020-06-05 16:49 ` [PULL 13/29] docs/system: Document Aspeed boards Peter Maydell
2020-06-05 16:49 ` [PULL 14/29] raspi: add BCM2835 SOC MPHI emulation Peter Maydell
2020-06-05 16:49 ` [PULL 15/29] dwc-hsotg (dwc2) USB host controller register definitions Peter Maydell
2020-07-04  9:17   ` Philippe Mathieu-Daudé
2020-07-04 10:35     ` Philippe Mathieu-Daudé
2020-06-05 16:49 ` [PULL 16/29] dwc-hsotg (dwc2) USB host controller state definitions Peter Maydell
2020-06-05 16:49 ` [PULL 17/29] dwc-hsotg (dwc2) USB host controller emulation Peter Maydell
2020-06-05 16:49 ` [PULL 18/29] usb: add short-packet handling to usb-storage driver Peter Maydell
2020-06-05 16:49 ` [PULL 19/29] wire in the dwc-hsotg (dwc2) USB host controller emulation Peter Maydell
2020-06-05 16:49 ` [PULL 20/29] raspi2 acceptance test: add test for dwc-hsotg (dwc2) USB host Peter Maydell
2020-06-05 16:49 ` [PULL 21/29] target/arm: Convert Neon VSHL and VSLI 2-reg-shift insn to decodetree Peter Maydell
2020-06-05 16:50 ` [PULL 22/29] target/arm: Convert Neon VSHR 2-reg-shift insns " Peter Maydell
2020-06-05 16:50 ` [PULL 23/29] target/arm: Convert Neon VSRA, VSRI, VRSHR, VRSRA " Peter Maydell
2020-06-05 16:50 ` [PULL 24/29] target/arm: Convert VQSHLU, VQSHL " Peter Maydell
2020-06-05 16:50 ` [PULL 25/29] target/arm: Convert Neon narrowing shifts with op==8 " Peter Maydell
2020-06-05 16:50 ` [PULL 26/29] target/arm: Convert Neon narrowing shifts with op==9 " Peter Maydell
2020-06-05 16:50 ` Peter Maydell [this message]
2020-06-05 16:50 ` [PULL 28/29] target/arm: Convert VCVT fixed-point ops " Peter Maydell
2020-06-05 16:50 ` [PULL 29/29] target/arm: Convert Neon one-register-and-immediate insns " Peter Maydell
2020-06-05 20:10 ` [PULL 00/29] target-arm queue no-reply
2020-06-08 10:04 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200605165007.12095-28-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.