All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-arm@nongnu.org, qemu-devel@nongnu.org
Cc: Richard Henderson <richard.henderson@linaro.org>
Subject: [PATCH 28/36] target/arm: Convert Neon VPMAX/VPMIN 3-reg-same insns to decodetree
Date: Thu, 30 Apr 2020 19:09:55 +0100	[thread overview]
Message-ID: <20200430181003.21682-29-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200430181003.21682-1-peter.maydell@linaro.org>

Convert the Neon integer VPMAX and VPMIN 3-reg-same insns to
decodetree. These are 'pairwise' operations.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/translate-neon.inc.c | 71 +++++++++++++++++++++++++++++++++
 target/arm/translate.c          | 16 +-------
 target/arm/neon-dp.decode       |  9 +++++
 3 files changed, 82 insertions(+), 14 deletions(-)

diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 4692448fc5f..cd4c9dd6f28 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -1204,3 +1204,74 @@ static bool trans_VABA_U_3s(DisasContext *s, arg_3same *a)
     }
     return do_vaba(s, a, abd_fns[a->size], add_fns[a->size]);
 }
+
+static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
+{
+    /* Operations handled pairwise 32 bits at a time */
+    TCGv_i32 tmp, tmp2, tmp3;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size == 3) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    assert(a->q == 0); /* enforced by decode patterns */
+
+    /*
+     * Note that we have to be careful not to clobber the source operands
+     * in the "vm == vd" case by storing the result of the first pass too
+     * early. Since Q is 0 there are always just two passes, so instead
+     * of a complicated loop over each pass we just unroll.
+     */
+    tmp = neon_load_reg(a->vn, 0);
+    tmp2 = neon_load_reg(a->vn, 1);
+    fn(tmp, tmp, tmp2);
+    tcg_temp_free_i32(tmp2);
+
+    tmp3 = neon_load_reg(a->vm, 0);
+    tmp2 = neon_load_reg(a->vm, 1);
+    fn(tmp3, tmp3, tmp2);
+    tcg_temp_free_i32(tmp2);
+
+    neon_store_reg(a->vd, 0, tmp);
+    neon_store_reg(a->vd, 1, tmp3);
+    return true;
+}
+
+#define DO_3SAME_PAIR(INSN, func)                                       \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        static NeonGenTwoOpFn * const fns[] = {                         \
+            gen_helper_neon_##func##8,                                  \
+            gen_helper_neon_##func##16,                                 \
+            gen_helper_neon_##func##32,                                 \
+        };                                                              \
+        if (a->size > 2) {                                              \
+            return false;                                               \
+        }                                                               \
+        return do_3same_pair(s, a, fns[a->size]);                       \
+    }
+
+/* 32-bit pairwise ops end up the same as the elementwise versions.  */
+#define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
+#define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
+#define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
+#define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
+
+DO_3SAME_PAIR(VPMAX_S, pmax_s)
+DO_3SAME_PAIR(VPMIN_S, pmin_s)
+DO_3SAME_PAIR(VPMAX_U, pmax_u)
+DO_3SAME_PAIR(VPMIN_U, pmin_u)
diff --git a/target/arm/translate.c b/target/arm/translate.c
index b04643cec9a..4bbdddaa30c 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3011,12 +3011,6 @@ static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
     }
 }
 
-/* 32-bit pairwise ops end up the same as the elementwise versions.  */
-#define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
-#define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
-#define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
-#define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
-
 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
     switch ((size << 1) | u) { \
     case 0: \
@@ -4794,6 +4788,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         case NEON_3R_VRSHL:
         case NEON_3R_VQRSHL:
         case NEON_3R_VABA:
+        case NEON_3R_VPMAX:
+        case NEON_3R_VPMIN:
             /* Already handled by decodetree */
             return 1;
         }
@@ -4805,8 +4801,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         pairwise = 0;
         switch (op) {
         case NEON_3R_VPADD_VQRDMLAH:
-        case NEON_3R_VPMAX:
-        case NEON_3R_VPMIN:
             pairwise = 1;
             break;
         case NEON_3R_FLOAT_ARITH:
@@ -4863,12 +4857,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
             tmp2 = neon_load_reg(rm, pass);
         }
         switch (op) {
-        case NEON_3R_VPMAX:
-            GEN_NEON_INTEGER_OP(pmax);
-            break;
-        case NEON_3R_VPMIN:
-            GEN_NEON_INTEGER_OP(pmin);
-            break;
         case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high.  */
             if (!u) { /* VQDMULH */
                 switch (size) {
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index d91f944f84a..e47998899ce 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -42,6 +42,9 @@
 @3same           .... ... . . . size:2 .... .... .... . q:1 . . .... \
                  &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
 
+@3same_q0        .... ... . . . size:2 .... .... .... . 0 . . .... \
+                 &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
+
 VHADD_S_3s       1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
 VHADD_U_3s       1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
 VQADD_S_3s       1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
@@ -128,6 +131,12 @@ VMLS_3s          1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same
 VMUL_3s          1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
 VMUL_p_3s        1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
 
+VPMAX_S_3s       1111 001 0 0 . .. .... .... 1010 . . . 0 .... @3same_q0
+VPMAX_U_3s       1111 001 1 0 . .. .... .... 1010 . . . 0 .... @3same_q0
+
+VPMIN_S_3s       1111 001 0 0 . .. .... .... 1010 . . . 1 .... @3same_q0
+VPMIN_U_3s       1111 001 1 0 . .. .... .... 1010 . . . 1 .... @3same_q0
+
 VQRDMLAH_3s      1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
 
 SHA1_3s          1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \
-- 
2.20.1



  parent reply	other threads:[~2020-04-30 18:30 UTC|newest]

Thread overview: 85+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-30 18:09 [PATCH 00/36] target/arm: Convert Neon to decodetree (part 1) Peter Maydell
2020-04-30 18:09 ` [PATCH 01/36] target/arm/translate-vfp.inc.c: Remove duplicate simd_r32 check Peter Maydell
2020-04-30 18:21   ` Richard Henderson
2020-05-01 16:55   ` Philippe Mathieu-Daudé
2020-04-30 18:09 ` [PATCH 02/36] target/arm: Don't allow Thumb Neon insns without FEATURE_NEON Peter Maydell
2020-04-30 18:22   ` Richard Henderson
2020-05-01 16:56   ` Philippe Mathieu-Daudé
2020-04-30 18:09 ` [PATCH 03/36] target/arm: Add stubs for AArch32 Neon decodetree Peter Maydell
2020-04-30 18:30   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 04/36] target/arm: Convert VCMLA (vector) to decodetree Peter Maydell
2020-04-30 18:34   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 05/36] target/arm: Convert VCADD " Peter Maydell
2020-04-30 18:35   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 06/36] target/arm: Convert V[US]DOT " Peter Maydell
2020-04-30 18:36   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 07/36] target/arm: Convert VFM[AS]L " Peter Maydell
2020-04-30 18:43   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 08/36] target/arm: Convert VCMLA (scalar) " Peter Maydell
2020-04-30 19:00   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 09/36] target/arm: Convert V[US]DOT " Peter Maydell
2020-04-30 19:01   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 10/36] target/arm: Convert VFM[AS]L " Peter Maydell
2020-04-30 19:06   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 11/36] target/arm: Convert Neon load/store multiple structures " Peter Maydell
2020-04-30 19:09   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 12/36] target/arm: Convert Neon 'load single structure to all lanes' " Peter Maydell
2020-04-30 19:17   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 13/36] target/arm: Convert Neon 'load/store single structure' " Peter Maydell
2020-04-30 19:32   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 14/36] target/arm: Convert Neon 3-reg-same VADD/VSUB " Peter Maydell
2020-04-30 19:36   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 15/36] target/arm: Convert Neon 3-reg-same logic ops " Peter Maydell
2020-04-30 19:39   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 16/36] target/arm: Convert Neon 3-reg-same VMAX/VMIN " Peter Maydell
2020-04-30 19:45   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 17/36] target/arm: Convert Neon 3-reg-same comparisons " Peter Maydell
2020-04-30 19:48   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 18/36] target/arm: Convert Neon 3-reg-same VQADD/VQSUB " Peter Maydell
2020-04-30 19:50   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 19/36] target/arm: Convert Neon 3-reg-same VMUL, VMLA, VMLS, VSHL " Peter Maydell
2020-04-30 19:58   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 20/36] target/arm: Convert Neon 3-reg-same VQRDMLAH/VQRDMLSH " Peter Maydell
2020-04-30 20:03   ` Richard Henderson
2020-04-30 20:28   ` Richard Henderson
2020-05-01 14:23     ` Peter Maydell
2020-04-30 18:09 ` [PATCH 21/36] target/arm: Convert Neon 3-reg-same SHA " Peter Maydell
2020-04-30 20:30   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 22/36] target/arm: Move gen_ function typedefs to translate.h Peter Maydell
2020-04-30 20:32   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 23/36] target/arm: Convert Neon 64-bit element 3-reg-same insns Peter Maydell
2020-04-30 20:54   ` Richard Henderson
2020-05-01 15:36     ` Peter Maydell
2020-05-01 15:50       ` Richard Henderson
2020-05-01 15:57         ` Peter Maydell
2020-05-01 16:12           ` Richard Henderson
2020-05-01 15:54     ` Peter Maydell
2020-05-01 16:13       ` Richard Henderson
2020-04-30 18:09 ` [PATCH 24/36] target/arm: Convert Neon VHADD " Peter Maydell
2020-04-30 20:59   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 25/36] target/arm: Convert Neon VRHADD, VHSUB, VABD 3-reg-same insns to decodetree Peter Maydell
2020-04-30 18:09 ` [PATCH 26/36] target/arm: Convert Neon VQSHL, VRSHL, VQRSHL " Peter Maydell
2020-05-01  1:55   ` Richard Henderson
2020-05-01 18:10     ` Peter Maydell
2020-04-30 18:09 ` [PATCH 27/36] target/arm: Convert Neon VABA 3-reg-same " Peter Maydell
2020-05-01  2:29   ` Richard Henderson
2020-04-30 18:09 ` Peter Maydell [this message]
2020-05-01  3:36   ` [PATCH 28/36] target/arm: Convert Neon VPMAX/VPMIN 3-reg-same insns " Richard Henderson
2020-04-30 18:09 ` [PATCH 29/36] target/arm: Convert Neon VPADD " Peter Maydell
2020-05-01  3:39   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 30/36] target/arm: Convert Neon VQDMULH/VQRDMULH 3-reg-same " Peter Maydell
2020-05-01  3:47   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 31/36] target/arm: Convert Neon VADD, VSUB, VABD 3-reg-same insns " Peter Maydell
2020-05-01  3:57   ` Richard Henderson
2020-04-30 18:09 ` [PATCH 32/36] target/arm: Convert Neon VPMIN/VPMAX/VPADD float " Peter Maydell
2020-05-01  3:59   ` Richard Henderson
2020-04-30 18:10 ` [PATCH 33/36] target/arm: Convert Neon fp VMUL, VMLA, VMLS " Peter Maydell
2020-05-01  4:07   ` Richard Henderson
2020-04-30 18:10 ` [PATCH 34/36] target/arm: Convert Neon 3-reg-same compare " Peter Maydell
2020-05-01  4:09   ` Richard Henderson
2020-04-30 18:10 ` [PATCH 35/36] target/arm: Convert Neon fp VMAX/VMIN/VMAXNM/VMINNM/VRECPS/VRSQRTS " Peter Maydell
2020-05-01  4:13   ` Richard Henderson
2020-04-30 18:10 ` [PATCH 36/36] target/arm: Convert NEON VFMA, VFMS 3-reg-same insns " Peter Maydell
2020-05-01  4:14   ` Richard Henderson
2020-05-01  7:32 ` [PATCH 00/36] target/arm: Convert Neon to decodetree (part 1) no-reply
2020-05-04 12:04 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200430181003.21682-29-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.