All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-arm@nongnu.org, qemu-devel@nongnu.org
Subject: [PATCH for-6.2 21/53] target/arm: Implement MVE integer min/max across vector
Date: Thu, 29 Jul 2021 12:14:40 +0100	[thread overview]
Message-ID: <20210729111512.16541-22-peter.maydell@linaro.org> (raw)
In-Reply-To: <20210729111512.16541-1-peter.maydell@linaro.org>

Implement the MVE integer min/max across vector insns
VMAXV, VMINV, VMAXAV and VMINAV, which find the maximum
from the vector elements and a general purpose register,
and store the maximum back into the general purpose
register.

These insns overlap with VRMLALDAVH (they use what would
be RdaHi=0b110).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
Changes v1->v2: Drop the harmless but unnecessary
"take abs value of 'n'" part of do_maxa() and do_mina()
---
 target/arm/helper-mve.h    | 20 ++++++++++++
 target/arm/mve.decode      | 18 +++++++++--
 target/arm/mve_helper.c    | 66 ++++++++++++++++++++++++++++++++++++++
 target/arm/translate-mve.c | 48 +++++++++++++++++++++++++++
 4 files changed, 150 insertions(+), 2 deletions(-)

diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
index 0ee5ea3cabd..2c66fcba792 100644
--- a/target/arm/helper-mve.h
+++ b/target/arm/helper-mve.h
@@ -379,6 +379,26 @@ DEF_HELPER_FLAGS_3(mve_vaddvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
 DEF_HELPER_FLAGS_3(mve_vaddvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
 DEF_HELPER_FLAGS_3(mve_vaddvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
 
+DEF_HELPER_FLAGS_3(mve_vmaxvsb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvsh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvub, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxavb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxavh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxavw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vminvsb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvsh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvub, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminavb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminavh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminavw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
 DEF_HELPER_FLAGS_3(mve_vaddlv_s, TCG_CALL_NO_WG, i64, env, ptr, i64)
 DEF_HELPER_FLAGS_3(mve_vaddlv_u, TCG_CALL_NO_WG, i64, env, ptr, i64)
 
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
index bdcd660aaf4..83dc0300d69 100644
--- a/target/arm/mve.decode
+++ b/target/arm/mve.decode
@@ -40,6 +40,7 @@
 &vcmp qm qn size mask
 &vcmp_scalar qn rm size mask
 &shl_scalar qda rm size
+&vmaxv qm rda size
 
 @vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
 # Note that both Rn and Qd are 3 bits only (no D bit)
@@ -97,6 +98,8 @@
 @vcmp_scalar .... .... .. size:2 qn:3 . .... .... .... rm:4 &vcmp_scalar \
              mask=%mask_22_13
 
+@vmaxv .... .... .... size:2 .. rda:4 .... .... .... &vmaxv qm=%qm
+
 # Vector loads and stores
 
 # Widening loads and narrowing stores:
@@ -314,8 +317,19 @@ VMLALDAV_U       1111 1110 1 ... ... . ... . 1110 . 0 . 0 ... 0 @vmlaldav
 
 VMLSLDAV         1110 1110 1 ... ... . ... . 1110 . 0 . 0 ... 1 @vmlaldav
 
-VRMLALDAVH_S     1110 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz
-VRMLALDAVH_U     1111 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz
+{
+  VMAXV_S        1110 1110 1110  .. 10 ....  1111 0 0 . 0 ... 0 @vmaxv
+  VMINV_S        1110 1110 1110  .. 10 ....  1111 1 0 . 0 ... 0 @vmaxv
+  VMAXAV         1110 1110 1110  .. 00 ....  1111 0 0 . 0 ... 0 @vmaxv
+  VMINAV         1110 1110 1110  .. 00 ....  1111 1 0 . 0 ... 0 @vmaxv
+  VRMLALDAVH_S   1110 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz
+}
+
+{
+  VMAXV_U        1111 1110 1110  .. 10 ....  1111 0 0 . 0 ... 0 @vmaxv
+  VMINV_U        1111 1110 1110  .. 10 ....  1111 1 0 . 0 ... 0 @vmaxv
+  VRMLALDAVH_U   1111 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz
+}
 
 VRMLSLDAVH       1111 1110 1 ... ... 0 ... . 1110 . 0 . 0 ... 1 @vmlaldav_nosz
 
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index ac608fc524b..924ad7f2bdc 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -1254,6 +1254,72 @@ DO_VADDV(vaddvub, 1, uint8_t)
 DO_VADDV(vaddvuh, 2, uint16_t)
 DO_VADDV(vaddvuw, 4, uint32_t)
 
+/*
+ * Vector max/min across vector. Unlike VADDV, we must
+ * read ra as the element size, not its full width.
+ * We work with int64_t internally for simplicity.
+ */
+#define DO_VMAXMINV(OP, ESIZE, TYPE, RATYPE, FN)                \
+    uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
+                                    uint32_t ra_in)             \
+    {                                                           \
+        uint16_t mask = mve_element_mask(env);                  \
+        unsigned e;                                             \
+        TYPE *m = vm;                                           \
+        int64_t ra = (RATYPE)ra_in;                             \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {      \
+            if (mask & 1) {                                     \
+                ra = FN(ra, m[H##ESIZE(e)]);                    \
+            }                                                   \
+        }                                                       \
+        mve_advance_vpt(env);                                   \
+        return ra;                                              \
+    }                                                           \
+
+#define DO_VMAXMINV_U(INSN, FN)                         \
+    DO_VMAXMINV(INSN##b, 1, uint8_t, uint8_t, FN)       \
+    DO_VMAXMINV(INSN##h, 2, uint16_t, uint16_t, FN)     \
+    DO_VMAXMINV(INSN##w, 4, uint32_t, uint32_t, FN)
+#define DO_VMAXMINV_S(INSN, FN)                         \
+    DO_VMAXMINV(INSN##b, 1, int8_t, int8_t, FN)         \
+    DO_VMAXMINV(INSN##h, 2, int16_t, int16_t, FN)       \
+    DO_VMAXMINV(INSN##w, 4, int32_t, int32_t, FN)
+
+/*
+ * Helpers for max and min of absolute values across vector:
+ * note that we only take the absolute value of 'm', not 'n'
+ */
+static int64_t do_maxa(int64_t n, int64_t m)
+{
+    if (m < 0) {
+        m = -m;
+    }
+    return MAX(n, m);
+}
+
+static int64_t do_mina(int64_t n, int64_t m)
+{
+    if (m < 0) {
+        m = -m;
+    }
+    return MIN(n, m);
+}
+
+DO_VMAXMINV_S(vmaxvs, DO_MAX)
+DO_VMAXMINV_U(vmaxvu, DO_MAX)
+DO_VMAXMINV_S(vminvs, DO_MIN)
+DO_VMAXMINV_U(vminvu, DO_MIN)
+/*
+ * VMAXAV, VMINAV treat the general purpose input as unsigned
+ * and the vector elements as signed.
+ */
+DO_VMAXMINV(vmaxavb, 1, int8_t, uint8_t, do_maxa)
+DO_VMAXMINV(vmaxavh, 2, int16_t, uint16_t, do_maxa)
+DO_VMAXMINV(vmaxavw, 4, int32_t, uint32_t, do_maxa)
+DO_VMAXMINV(vminavb, 1, int8_t, uint8_t, do_mina)
+DO_VMAXMINV(vminavh, 2, int16_t, uint16_t, do_mina)
+DO_VMAXMINV(vminavw, 4, int32_t, uint32_t, do_mina)
+
 #define DO_VADDLV(OP, TYPE, LTYPE)                              \
     uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
                                     uint64_t ra)                \
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
index 44731fc4eb7..2fce74f86ab 100644
--- a/target/arm/translate-mve.c
+++ b/target/arm/translate-mve.c
@@ -1321,3 +1321,51 @@ DO_VCMP(VCMPGE, vcmpge)
 DO_VCMP(VCMPLT, vcmplt)
 DO_VCMP(VCMPGT, vcmpgt)
 DO_VCMP(VCMPLE, vcmple)
+
+static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
+{
+    /*
+     * MIN/MAX operations across a vector: compute the min or
+     * max of the initial value in a general purpose register
+     * and all the elements in the vector, and store it back
+     * into the general purpose register.
+     */
+    TCGv_ptr qm;
+    TCGv_i32 rda;
+
+    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
+        !fn || a->rda == 13 || a->rda == 15) {
+        /* Rda cases are UNPREDICTABLE */
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qm = mve_qreg_ptr(a->qm);
+    rda = load_reg(s, a->rda);
+    fn(rda, cpu_env, qm, rda);
+    store_reg(s, a->rda, rda);
+    tcg_temp_free_ptr(qm);
+    mve_update_eci(s);
+    return true;
+}
+
+#define DO_VMAXV(INSN, FN)                                      \
+    static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
+    {                                                           \
+        static MVEGenVADDVFn * const fns[] = {                  \
+            gen_helper_mve_##FN##b,                             \
+            gen_helper_mve_##FN##h,                             \
+            gen_helper_mve_##FN##w,                             \
+            NULL,                                               \
+        };                                                      \
+        return do_vmaxv(s, a, fns[a->size]);                    \
+    }
+
+DO_VMAXV(VMAXV_S, vmaxvs)
+DO_VMAXV(VMAXV_U, vmaxvu)
+DO_VMAXV(VMAXAV, vmaxav)
+DO_VMAXV(VMINV_S, vminvs)
+DO_VMAXV(VMINV_U, vminvu)
+DO_VMAXV(VMINAV, vminav)
-- 
2.20.1



  parent reply	other threads:[~2021-07-29 11:33 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-29 11:14 [PATCH for-6.2 00/53] target/arm: MVE slices 3 and 4 Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 01/53] target/arm: Note that we handle VMOVL as a special case of VSHLL Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 02/53] target/arm: Print MVE VPR in CPU dumps Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 03/53] target/arm: Fix MVE VSLI by 0 and VSRI by <dt> Peter Maydell
2021-07-30 18:56   ` Richard Henderson
2021-07-29 11:14 ` [PATCH for-6.2 04/53] target/arm: Fix signed VADDV Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 05/53] target/arm: Fix mask handling for MVE narrowing operations Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 06/53] target/arm: Fix 48-bit saturating shifts Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 07/53] target/arm: Fix MVE 48-bit SQRSHRL for small right shifts Peter Maydell
2021-07-30 19:07   ` Richard Henderson
2021-08-12  9:43     ` Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 08/53] target/arm: Fix calculation of LTP mask when LR is 0 Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 09/53] target/arm: Factor out mve_eci_mask() Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 10/53] target/arm: Fix VPT advance when ECI is non-zero Peter Maydell
2021-07-30 19:14   ` Richard Henderson
2021-07-29 11:14 ` [PATCH for-6.2 11/53] target/arm: Fix VLDRB/H/W for predicated elements Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 12/53] target/arm: Implement MVE VMULL (polynomial) Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 13/53] target/arm: Implement MVE incrementing/decrementing dup insns Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 14/53] target/arm: Factor out gen_vpst() Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 15/53] target/arm: Implement MVE integer vector comparisons Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 16/53] target/arm: Implement MVE integer vector-vs-scalar comparisons Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 17/53] target/arm: Implement MVE VPSEL Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 18/53] target/arm: Implement MVE VMLAS Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 19/53] target/arm: Implement MVE shift-by-scalar Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 20/53] target/arm: Move 'x' and 'a' bit definitions into vmlaldav formats Peter Maydell
2021-07-29 11:14 ` Peter Maydell [this message]
2021-07-30 19:15   ` [PATCH for-6.2 21/53] target/arm: Implement MVE integer min/max across vector Richard Henderson
2021-07-29 11:14 ` [PATCH for-6.2 22/53] target/arm: Implement MVE VABAV Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 23/53] target/arm: Implement MVE narrowing moves Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 24/53] target/arm: Rename MVEGenDualAccOpFn to MVEGenLongDualAccOpFn Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 25/53] target/arm: Implement MVE VMLADAV and VMLSLDAV Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 26/53] target/arm: Implement MVE VMLA Peter Maydell
2021-07-30 19:18   ` Richard Henderson
2021-07-29 11:14 ` [PATCH for-6.2 27/53] target/arm: Implement MVE saturating doubling multiply accumulates Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 28/53] target/arm: Implement MVE VQABS, VQNEG Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 29/53] target/arm: Implement MVE VMAXA, VMINA Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 30/53] target/arm: Implement MVE VMOV to/from 2 general-purpose registers Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 31/53] target/arm: Implement MVE VPNOT Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 32/53] target/arm: Implement MVE VCTP Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 33/53] target/arm: Implement MVE scatter-gather insns Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 34/53] target/arm: Implement MVE scatter-gather immediate forms Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 35/53] target/arm: Implement MVE interleaving loads/stores Peter Maydell
2021-07-29 11:14 ` [PATCH for-6.2 36/53] target/arm: Implement MVE VADD (floating-point) Peter Maydell
2021-07-30 19:27   ` Richard Henderson
2021-07-30 19:37   ` Richard Henderson
2021-07-29 11:14 ` [PATCH for-6.2 37/53] target/arm: Implement MVE VSUB, VMUL, VABD, VMAXNM, VMINNM Peter Maydell
2021-07-30 19:28   ` Richard Henderson
2021-07-29 11:14 ` [PATCH for-6.2 38/53] target/arm: Implement MVE VCADD Peter Maydell
2021-07-30 19:32   ` Richard Henderson
2021-07-29 11:14 ` [PATCH for-6.2 39/53] target/arm: Implement MVE VFMA and VFMS Peter Maydell
2021-07-30 19:34   ` Richard Henderson
2021-07-30 19:41   ` Richard Henderson
2021-07-29 11:14 ` [PATCH for-6.2 40/53] target/arm: Implement MVE VCMUL and VCMLA Peter Maydell
2021-07-30 19:47   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 41/53] target/arm: Implement MVE VMAXNMA and VMINNMA Peter Maydell
2021-07-30 19:50   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 42/53] target/arm: Implement MVE scalar fp insns Peter Maydell
2021-07-30 19:55   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 43/53] target/arm: Implement MVE fp-with-scalar VFMA, VFMAS Peter Maydell
2021-07-30 19:58   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 44/53] softfloat: Remove assertion preventing silencing of NaN in default-NaN mode Peter Maydell
2021-07-30 20:00   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 45/53] target/arm: Implement MVE FP max/min across vector Peter Maydell
2021-07-30 20:12   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 46/53] target/arm: Implement MVE fp vector comparisons Peter Maydell
2021-07-30 20:21   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 47/53] target/arm: Implement MVE fp scalar comparisons Peter Maydell
2021-07-30 20:22   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 48/53] target/arm: Implement MVE VCVT between floating and fixed point Peter Maydell
2021-07-30 20:24   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 49/53] target/arm: Implement MVE VCVT between fp and integer Peter Maydell
2021-07-30 20:27   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 50/53] target/arm: Implement MVE VCVT with specified rounding mode Peter Maydell
2021-07-30 20:28   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 51/53] target/arm: Implement MVE VCVT between single and half precision Peter Maydell
2021-07-30 20:33   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 52/53] target/arm: Implement MVE VRINT insns Peter Maydell
2021-07-30 20:47   ` Richard Henderson
2021-07-29 11:15 ` [PATCH for-6.2 53/53] target/arm: Enable MVE in Cortex-M55 Peter Maydell
2021-07-30 20:48   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210729111512.16541-22-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.