All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 32/35] target/arm: Generalize inl_qrdmlah_* helper functions
Date: Fri, 28 Aug 2020 10:24:10 +0100	[thread overview]
Message-ID: <20200828092413.22206-33-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200828092413.22206-1-peter.maydell@linaro.org>

From: Richard Henderson <richard.henderson@linaro.org>

Unify add/sub helpers and add a parameter for rounding.
This will allow saturating non-rounding to reuse this code.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
[PMM: fixed accidental use of '=' rather than '+=' in do_sqrdmlah_s]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 20200815013145.539409-15-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/vec_helper.c | 80 +++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 51 deletions(-)

diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 7d76412ee0d..9f10be03ed7 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -37,19 +37,24 @@
 #endif
 
 /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
-static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2,
-                               int16_t src3, uint32_t *sat)
+static int16_t do_sqrdmlah_h(int16_t src1, int16_t src2, int16_t src3,
+                             bool neg, bool round, uint32_t *sat)
 {
-    /* Simplify:
+    /*
+     * Simplify:
      * = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16
      * = ((a3 << 15) + (e1 * e2) + (1 << 14)) >> 15
      */
     int32_t ret = (int32_t)src1 * src2;
-    ret = ((int32_t)src3 << 15) + ret + (1 << 14);
+    if (neg) {
+        ret = -ret;
+    }
+    ret += ((int32_t)src3 << 15) + (round << 14);
     ret >>= 15;
+
     if (ret != (int16_t)ret) {
         *sat = 1;
-        ret = (ret < 0 ? -0x8000 : 0x7fff);
+        ret = (ret < 0 ? INT16_MIN : INT16_MAX);
     }
     return ret;
 }
@@ -58,8 +63,9 @@ uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1,
                                   uint32_t src2, uint32_t src3)
 {
     uint32_t *sat = &env->vfp.qc[0];
-    uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat);
-    uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
+    uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, false, true, sat);
+    uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16,
+                                false, true, sat);
     return deposit32(e1, 16, 16, e2);
 }
 
@@ -73,35 +79,18 @@ void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm,
     uintptr_t i;
 
     for (i = 0; i < opr_sz / 2; ++i) {
-        d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq);
+        d[i] = do_sqrdmlah_h(n[i], m[i], d[i], false, true, vq);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-/* Signed saturating rounding doubling multiply-subtract high half, 16-bit */
-static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2,
-                               int16_t src3, uint32_t *sat)
-{
-    /* Similarly, using subtraction:
-     * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16
-     * = ((a3 << 15) - (e1 * e2) + (1 << 14)) >> 15
-     */
-    int32_t ret = (int32_t)src1 * src2;
-    ret = ((int32_t)src3 << 15) - ret + (1 << 14);
-    ret >>= 15;
-    if (ret != (int16_t)ret) {
-        *sat = 1;
-        ret = (ret < 0 ? -0x8000 : 0x7fff);
-    }
-    return ret;
-}
-
 uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1,
                                   uint32_t src2, uint32_t src3)
 {
     uint32_t *sat = &env->vfp.qc[0];
-    uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat);
-    uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
+    uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, true, true, sat);
+    uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16,
+                                true, true, sat);
     return deposit32(e1, 16, 16, e2);
 }
 
@@ -115,19 +104,23 @@ void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
     uintptr_t i;
 
     for (i = 0; i < opr_sz / 2; ++i) {
-        d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq);
+        d[i] = do_sqrdmlah_h(n[i], m[i], d[i], true, true, vq);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
 /* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
-static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2,
-                               int32_t src3, uint32_t *sat)
+static int32_t do_sqrdmlah_s(int32_t src1, int32_t src2, int32_t src3,
+                             bool neg, bool round, uint32_t *sat)
 {
     /* Simplify similarly to int_qrdmlah_s16 above.  */
     int64_t ret = (int64_t)src1 * src2;
-    ret = ((int64_t)src3 << 31) + ret + (1 << 30);
+    if (neg) {
+        ret = -ret;
+    }
+    ret += ((int64_t)src3 << 31) + (round << 30);
     ret >>= 31;
+
     if (ret != (int32_t)ret) {
         *sat = 1;
         ret = (ret < 0 ? INT32_MIN : INT32_MAX);
@@ -139,7 +132,7 @@ uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
                                   int32_t src2, int32_t src3)
 {
     uint32_t *sat = &env->vfp.qc[0];
-    return inl_qrdmlah_s32(src1, src2, src3, sat);
+    return do_sqrdmlah_s(src1, src2, src3, false, true, sat);
 }
 
 void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
@@ -152,31 +145,16 @@ void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
     uintptr_t i;
 
     for (i = 0; i < opr_sz / 4; ++i) {
-        d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq);
+        d[i] = do_sqrdmlah_s(n[i], m[i], d[i], false, true, vq);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-/* Signed saturating rounding doubling multiply-subtract high half, 32-bit */
-static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2,
-                               int32_t src3, uint32_t *sat)
-{
-    /* Simplify similarly to int_qrdmlsh_s16 above.  */
-    int64_t ret = (int64_t)src1 * src2;
-    ret = ((int64_t)src3 << 31) - ret + (1 << 30);
-    ret >>= 31;
-    if (ret != (int32_t)ret) {
-        *sat = 1;
-        ret = (ret < 0 ? INT32_MIN : INT32_MAX);
-    }
-    return ret;
-}
-
 uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
                                   int32_t src2, int32_t src3)
 {
     uint32_t *sat = &env->vfp.qc[0];
-    return inl_qrdmlsh_s32(src1, src2, src3, sat);
+    return do_sqrdmlah_s(src1, src2, src3, true, true, sat);
 }
 
 void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
@@ -189,7 +167,7 @@ void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
     uintptr_t i;
 
     for (i = 0; i < opr_sz / 4; ++i) {
-        d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq);
+        d[i] = do_sqrdmlah_s(n[i], m[i], d[i], true, true, vq);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
-- 
2.20.1



  parent reply	other threads:[~2020-08-28  9:40 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-28  9:23 [PULL 00/35] target-arm queue Peter Maydell
2020-08-28  9:23 ` [PULL 01/35] hw/arm/sbsa-ref: fix typo breaking PCIe IRQs Peter Maydell
2020-08-28  9:23 ` [PULL 02/35] hw/clock: Remove unused clock_init*() functions Peter Maydell
2020-08-28  9:23 ` [PULL 03/35] hw/clock: Let clock_set() return boolean value Peter Maydell
2020-08-28  9:23 ` [PULL 04/35] hw/clock: Only propagate clock changes if the clock is changed Peter Maydell
2020-08-28  9:23 ` [PULL 05/35] hw/arm/musicpal: Use AddressSpace for DMA transfers Peter Maydell
2020-08-28  9:23 ` [PULL 06/35] target/arm: Clarify HCR_EL2 ARMCPRegInfo type Peter Maydell
2020-08-28  9:23 ` [PULL 07/35] target/arm: Pass the entire mte descriptor to mte_check_fail Peter Maydell
2020-08-28  9:23 ` [PULL 08/35] target/arm: Fill in the WnR syndrome bit in mte_check_fail Peter Maydell
2020-08-28  9:23 ` [PULL 09/35] hw/sd/allwinner-sdhost: Use AddressSpace for DMA transfers Peter Maydell
2020-08-28  9:23 ` [PULL 10/35] hw/net/allwinner-sun8i-emac: " Peter Maydell
2020-08-28  9:23 ` [PULL 11/35] hw/arm/xilinx_zynq: Uninline cadence_uart_create() Peter Maydell
2020-08-28  9:23 ` [PULL 12/35] hw/arm/xilinx_zynq: Call qdev_connect_clock_in() before DeviceRealize Peter Maydell
2020-08-28  9:23 ` [PULL 13/35] hw/qdev-clock: Uninline qdev_connect_clock_in() Peter Maydell
2020-08-28  9:23 ` [PULL 14/35] hw/qdev-clock: Avoid calling qdev_connect_clock_in after DeviceRealize Peter Maydell
2020-08-28  9:23 ` [PULL 15/35] hw/misc/unimp: Display value after offset Peter Maydell
2020-08-28  9:23 ` [PULL 16/35] hw/misc/unimp: Display the value with width of the access size Peter Maydell
2020-08-28  9:23 ` [PULL 17/35] hw/misc/unimp: Display the offset with width of the region size Peter Maydell
2020-08-28  9:23 ` [PULL 18/35] armsse: Define ARMSSEClass correctly Peter Maydell
2020-08-28  9:23 ` [PULL 19/35] qemu/int128: Add int128_lshift Peter Maydell
2020-08-28  9:23 ` [PULL 20/35] target/arm: Split out gen_gvec_fn_zz Peter Maydell
2020-08-28  9:23 ` [PULL 21/35] target/arm: Split out gen_gvec_fn_zzz, do_zzz_fn Peter Maydell
2020-08-28  9:24 ` [PULL 22/35] target/arm: Rearrange {sve,fp}_check_access assert Peter Maydell
2020-08-28  9:24 ` [PULL 23/35] target/arm: Merge do_vector2_p into do_mov_p Peter Maydell
2020-08-28  9:24 ` [PULL 24/35] target/arm: Clean up 4-operand predicate expansion Peter Maydell
2020-08-28  9:24 ` [PULL 25/35] target/arm: Use tcg_gen_gvec_bitsel for trans_SEL_pppp Peter Maydell
2020-08-28  9:24 ` [PULL 26/35] target/arm: Split out gen_gvec_ool_zzzp Peter Maydell
2020-08-28  9:24 ` [PULL 27/35] target/arm: Merge helper_sve_clr_* and helper_sve_movz_* Peter Maydell
2020-08-28  9:24 ` [PULL 28/35] target/arm: Split out gen_gvec_ool_zzp Peter Maydell
2020-08-28  9:24 ` [PULL 29/35] target/arm: Split out gen_gvec_ool_zzz Peter Maydell
2020-08-28  9:24 ` [PULL 30/35] target/arm: Split out gen_gvec_ool_zz Peter Maydell
2020-08-28  9:24 ` [PULL 31/35] target/arm: Tidy SVE tszimm shift formats Peter Maydell
2020-08-28  9:24 ` Peter Maydell [this message]
2020-08-28  9:24 ` [PULL 33/35] target/arm: Convert integer multiply (indexed) to gvec for aa64 advsimd Peter Maydell
2020-08-28  9:24 ` [PULL 34/35] target/arm: Convert integer multiply-add " Peter Maydell
2020-08-28  9:24 ` [PULL 35/35] target/arm: Convert sq{, r}dmulh " Peter Maydell
2020-08-28 17:37 ` [PULL 00/35] target-arm queue Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200828092413.22206-33-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.