All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: qemu-arm@nongnu.org, peter.maydell@linaro.org
Subject: [Qemu-devel] [PATCH v6 32/35] target/arm: Implement SVE dot product (vectors)
Date: Tue, 26 Jun 2018 21:33:25 -0700	[thread overview]
Message-ID: <20180627043328.11531-33-richard.henderson@linaro.org> (raw)
In-Reply-To: <20180627043328.11531-1-richard.henderson@linaro.org>

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/helper.h        |  5 +++
 target/arm/translate-sve.c | 17 ++++++++++
 target/arm/vec_helper.c    | 67 ++++++++++++++++++++++++++++++++++++++
 target/arm/sve.decode      |  3 ++
 4 files changed, 92 insertions(+)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 8607077dda..e23ce7ff19 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -583,6 +583,11 @@ DEF_HELPER_FLAGS_5(gvec_qrdmlah_s32, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s32, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(gvec_fcadds, TCG_CALL_NO_RWG,
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 4f2152fb70..8a2bd1f8c5 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -3423,6 +3423,23 @@ DO_ZZI(UMIN, umin)
 
 #undef DO_ZZI
 
+static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a, uint32_t insn)
+{
+    static gen_helper_gvec_3 * const fns[2][2] = {
+        { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
+        { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
+    };
+
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vsz, vsz, 0, fns[a->u][a->sz]);
+    }
+    return true;
+}
+
 /*
  *** SVE Floating Point Multiply-Add Indexed Group
  */
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index db5aeb9f24..c16a30c3b5 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -194,6 +194,73 @@ void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
+/* Integer 8 and 16-bit dot-product.
+ *
+ * Note that for the loops herein, host endianness does not matter
+ * with respect to the ordering of data within the 64-bit lanes.
+ * All elements are treated equally, no matter where they are.
+ */
+
+void HELPER(gvec_sdot_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    uint32_t *d = vd;
+    int8_t *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 4; ++i) {
+        d[i] += n[i * 4 + 0] * m[i * 4 + 0]
+              + n[i * 4 + 1] * m[i * 4 + 1]
+              + n[i * 4 + 2] * m[i * 4 + 2]
+              + n[i * 4 + 3] * m[i * 4 + 3];
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_udot_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    uint32_t *d = vd;
+    uint8_t *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 4; ++i) {
+        d[i] += n[i * 4 + 0] * m[i * 4 + 0]
+              + n[i * 4 + 1] * m[i * 4 + 1]
+              + n[i * 4 + 2] * m[i * 4 + 2]
+              + n[i * 4 + 3] * m[i * 4 + 3];
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_sdot_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    uint64_t *d = vd;
+    int16_t *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 8; ++i) {
+        d[i] += (int64_t)n[i * 4 + 0] * m[i * 4 + 0]
+              + (int64_t)n[i * 4 + 1] * m[i * 4 + 1]
+              + (int64_t)n[i * 4 + 2] * m[i * 4 + 2]
+              + (int64_t)n[i * 4 + 3] * m[i * 4 + 3];
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_udot_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    uint64_t *d = vd;
+    uint16_t *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 8; ++i) {
+        d[i] += (uint64_t)n[i * 4 + 0] * m[i * 4 + 0]
+              + (uint64_t)n[i * 4 + 1] * m[i * 4 + 1]
+              + (uint64_t)n[i * 4 + 2] * m[i * 4 + 2]
+              + (uint64_t)n[i * 4 + 3] * m[i * 4 + 3];
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
 void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
                          void *vfpst, uint32_t desc)
 {
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 62365ed90f..35415bfb6c 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -725,6 +725,9 @@ UMIN_zzi        00100101 .. 101 011 110 ........ .....          @rdn_i8u
 # SVE integer multiply immediate (unpredicated)
 MUL_zzi         00100101 .. 110 000 110 ........ .....          @rdn_i8s
 
+# SVE integer dot product (unpredicated)
+DOT_zzz         01000100 1 sz:1 0 rm:5 00000 u:1 rn:5 rd:5
+
 # SVE floating-point complex add (predicated)
 FCADD           01100100 esz:2 00000 rot:1 100 pg:3 rm:5 rd:5 \
                 rn=%reg_movprfx
-- 
2.17.1

  parent reply	other threads:[~2018-06-27  4:34 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-27  4:32 [Qemu-devel] [PATCH v6 00/35] target/arm SVE patches Richard Henderson
2018-06-27  4:32 ` [Qemu-devel] [PATCH v6 01/35] target/arm: Implement SVE Memory Contiguous Load Group Richard Henderson
2018-06-27  4:32 ` [Qemu-devel] [PATCH v6 02/35] target/arm: Implement SVE Contiguous Load, first-fault and no-fault Richard Henderson
2018-06-27 15:56   ` [Qemu-devel] [Qemu-arm] " Alex Bennée
2018-06-27  4:32 ` [Qemu-devel] [PATCH v6 03/35] target/arm: Implement SVE Memory Contiguous Store Group Richard Henderson
2018-06-27  4:32 ` [Qemu-devel] [PATCH v6 04/35] target/arm: Implement SVE load and broadcast quadword Richard Henderson
2018-06-27  4:32 ` [Qemu-devel] [PATCH v6 05/35] target/arm: Implement SVE integer convert to floating-point Richard Henderson
2018-06-28 12:49   ` Peter Maydell
2018-06-27  4:32 ` [Qemu-devel] [PATCH v6 06/35] target/arm: Implement SVE floating-point arithmetic (predicated) Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 07/35] target/arm: Implement SVE FP Multiply-Add Group Richard Henderson
2018-06-28 10:29   ` Peter Maydell
2018-06-28 10:53   ` Alex Bennée
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 08/35] target/arm: Implement SVE Floating Point Accumulating Reduction Group Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 09/35] target/arm: Implement SVE load and broadcast element Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 10/35] target/arm: Implement SVE store vector/predicate register Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 11/35] target/arm: Implement SVE scatter stores Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 12/35] target/arm: Implement SVE prefetches Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 13/35] target/arm: Implement SVE gather loads Richard Henderson
2018-06-28 10:37   ` Peter Maydell
2018-06-28 13:18   ` [Qemu-devel] [Qemu-arm] " Alex Bennée
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 14/35] target/arm: Implement SVE first-fault " Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 15/35] target/arm: Implement SVE scatter store vector immediate Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 16/35] target/arm: Implement SVE floating-point compare vectors Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 17/35] target/arm: Implement SVE floating-point arithmetic with immediate Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 18/35] target/arm: Implement SVE Floating Point Multiply Indexed Group Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 19/35] target/arm: Implement SVE FP Fast Reduction Group Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 20/35] target/arm: Implement SVE Floating Point Unary Operations - Unpredicated Group Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 21/35] target/arm: Implement SVE FP Compare with Zero Group Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 22/35] target/arm: Implement SVE floating-point trig multiply-add coefficient Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 23/35] target/arm: Implement SVE floating-point convert precision Richard Henderson
2018-06-28 12:55   ` Peter Maydell
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 24/35] target/arm: Implement SVE floating-point convert to integer Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 25/35] target/arm: Implement SVE floating-point round to integral value Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 26/35] target/arm: Implement SVE floating-point unary operations Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 27/35] target/arm: Implement SVE MOVPRFX Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 28/35] target/arm: Implement SVE floating-point complex add Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 29/35] target/arm: Implement SVE fp complex multiply add Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 30/35] target/arm: Pass index to AdvSIMD FCMLA (indexed) Richard Henderson
2018-06-28 10:52   ` Peter Maydell
2018-06-28 13:25   ` [Qemu-devel] [Qemu-arm] " Alex Bennée
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 31/35] target/arm: Implement SVE fp complex multiply add (indexed) Richard Henderson
2018-06-28 12:47   ` Peter Maydell
2018-06-28 13:55   ` Alex Bennée
2018-06-27  4:33 ` Richard Henderson [this message]
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 33/35] target/arm: Implement SVE dot product (indexed) Richard Henderson
2018-06-28 13:07   ` Peter Maydell
2018-06-28 15:57     ` Richard Henderson
2018-06-28 16:07       ` Peter Maydell
2018-06-28 14:04   ` [Qemu-devel] [Qemu-arm] " Alex Bennée
2018-06-28 15:47     ` Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 34/35] target/arm: Enable SVE for aarch64-linux-user Richard Henderson
2018-06-27  4:33 ` [Qemu-devel] [PATCH v6 35/35] target/arm: Implement ARMv8.2-DotProd Richard Henderson
2018-06-28 11:30 ` [Qemu-devel] [Qemu-arm] [PATCH v6 00/35] target/arm SVE patches Alex Bennée
2018-06-28 14:12   ` Peter Maydell
2018-06-28 14:55     ` Peter Maydell
2018-06-28 14:55     ` Alex Bennée
2018-06-28 14:05 ` Alex Bennée

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180627043328.11531-33-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.