All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 23/45] target/arm: Implement bfloat16 matrix multiply accumulate
Date: Thu,  3 Jun 2021 16:58:42 +0100	[thread overview]
Message-ID: <20210603155904.26021-24-peter.maydell@linaro.org> (raw)
In-Reply-To: <20210603155904.26021-1-peter.maydell@linaro.org>

From: Richard Henderson <richard.henderson@linaro.org>

This is BFMMLA for both AArch64 AdvSIMD and SVE,
and VMMLA.BF16 for AArch32 NEON.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210525225817.400336-9-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/helper.h           |  3 +++
 target/arm/neon-shared.decode |  2 ++
 target/arm/sve.decode         |  6 +++--
 target/arm/translate-a64.c    | 10 +++++++++
 target/arm/translate-neon.c   |  9 ++++++++
 target/arm/translate-sve.c    | 12 ++++++++++
 target/arm/vec_helper.c       | 42 ++++++++++++++++++++++++++++++++++-
 7 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 376c1cef0f6..af75d7f25f2 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -1007,6 +1007,9 @@ DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #include "helper-sve.h"
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
index fa3cf14e3a6..4e0a25d27c1 100644
--- a/target/arm/neon-shared.decode
+++ b/target/arm/neon-shared.decode
@@ -67,6 +67,8 @@ VUMMLA         1111 1100 0.10 .... .... 1100 .1.1 .... \
                vm=%vm_dp vn=%vn_dp vd=%vd_dp
 VUSMMLA        1111 1100 1.10 .... .... 1100 .1.0 .... \
                vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VMMLA_b16      1111 1100 0.00 .... .... 1100 .1.0 .... \
+               vm=%vm_dp vn=%vn_dp vd=%vd_dp
 
 VCMLA_scalar   1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \
                vn=%vn_dp vd=%vd_dp size=1
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 51f87e8937e..6c17898deed 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1568,8 +1568,10 @@ SQRDCMLAH_zzzz  01000100 esz:2 0 rm:5 0011 rot:2 rn:5 rd:5  ra=%reg_movprfx
 USDOT_zzzz      01000100 .. 0 ..... 011 110 ..... .....  @rda_rn_rm
 
 ### SVE2 floating point matrix multiply accumulate
-
-FMMLA           01100100 .. 1 ..... 111001 ..... .....  @rda_rn_rm
+{
+  BFMMLA        01100100 01 1 ..... 111 001 ..... .....  @rda_rn_rm_e0
+  FMMLA         01100100 .. 1 ..... 111 001 ..... .....  @rda_rn_rm
+}
 
 ### SVE2 Memory Gather Load Group
 
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 71de75e568b..9ce2f5a7d43 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -12235,6 +12235,13 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
         }
         feature = dc_isar_feature(aa64_fcma, s);
         break;
+    case 0x1d: /* BFMMLA */
+        if (size != MO_16 || !is_q) {
+            unallocated_encoding(s);
+            return;
+        }
+        feature = dc_isar_feature(aa64_bf16, s);
+        break;
     case 0x1f: /* BFDOT */
         switch (size) {
         case 1:
@@ -12328,6 +12335,9 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
         }
         return;
 
+    case 0xd: /* BFMMLA */
+        gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
+        return;
     case 0xf: /* BFDOT */
         switch (size) {
         case 1:
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
index 8099767792b..9d227a1e13d 100644
--- a/target/arm/translate-neon.c
+++ b/target/arm/translate-neon.c
@@ -4126,3 +4126,12 @@ static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
                         gen_helper_gvec_usmmla_b);
 }
+
+static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
+{
+    if (!dc_isar_feature(aa32_bf16, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
+                        gen_helper_gvec_bfmmla);
+}
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 6f020306357..4f575dc3343 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -8677,3 +8677,15 @@ static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a)
     }
     return true;
 }
+
+static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_bf16, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla,
+                          a->rd, a->rn, a->rm, a->ra, 0);
+    }
+    return true;
+}
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 74a497f38ca..27e9bdd3299 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -2385,7 +2385,7 @@ static void do_mmla_b(void *vd, void *vn, void *vm, void *va, uint32_t desc,
          * Process the entire segment at once, writing back the
          * results only after we've consumed all of the inputs.
          *
-         * Key to indicies by column:
+         * Key to indices by column:
          *          i   j                  i             j
          */
         sum0 = a[H4(0 + 0)];
@@ -2472,3 +2472,43 @@ void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm,
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
+
+void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+{
+    intptr_t s, opr_sz = simd_oprsz(desc);
+    float32 *d = vd, *a = va;
+    uint32_t *n = vn, *m = vm;
+
+    for (s = 0; s < opr_sz / 4; s += 4) {
+        float32 sum00, sum01, sum10, sum11;
+
+        /*
+         * Process the entire segment at once, writing back the
+         * results only after we've consumed all of the inputs.
+         *
+         * Key to indicies by column:
+         *               i   j           i   k             j   k
+         */
+        sum00 = a[s + H4(0 + 0)];
+        sum00 = bfdotadd(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)]);
+        sum00 = bfdotadd(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)]);
+
+        sum01 = a[s + H4(0 + 1)];
+        sum01 = bfdotadd(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)]);
+        sum01 = bfdotadd(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)]);
+
+        sum10 = a[s + H4(2 + 0)];
+        sum10 = bfdotadd(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)]);
+        sum10 = bfdotadd(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)]);
+
+        sum11 = a[s + H4(2 + 1)];
+        sum11 = bfdotadd(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)]);
+        sum11 = bfdotadd(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)]);
+
+        d[s + H4(0 + 0)] = sum00;
+        d[s + H4(0 + 1)] = sum01;
+        d[s + H4(2 + 0)] = sum10;
+        d[s + H4(2 + 1)] = sum11;
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
-- 
2.20.1



  parent reply	other threads:[~2021-06-03 16:13 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-03 15:58 [PULL 00/45] target-arm queue Peter Maydell
2021-06-03 15:58 ` [PULL 01/45] target/arm: Add isar feature check functions for MVE Peter Maydell
2021-06-03 15:58 ` [PULL 02/45] target/arm: Update feature checks for insns which are "MVE or FP" Peter Maydell
2021-06-03 15:58 ` [PULL 03/45] target/arm: Move fpsp/fpdp isar check into callers of do_vfp_2op_sp/dp Peter Maydell
2021-06-03 15:58 ` [PULL 04/45] target/arm: Add MVE check to VMOV_reg_sp and VMOV_reg_dp Peter Maydell
2021-06-03 15:58 ` [PULL 05/45] target/arm: Fix return values in fp_sysreg_checks() Peter Maydell
2021-06-03 15:58 ` [PULL 06/45] target/arm: Implement M-profile VPR register Peter Maydell
2021-06-03 15:58 ` [PULL 07/45] target/arm: Make FPSCR.LTPSIZE writable for MVE Peter Maydell
2021-06-03 15:58 ` [PULL 08/45] target/arm: Allow board models to specify initial NS VTOR Peter Maydell
2021-06-03 15:58 ` [PULL 09/45] arm: Consistently use "Cortex-Axx", not "Cortex Axx" Peter Maydell
2021-06-03 15:58 ` [PULL 10/45] docs: Fix installation of man pages with Sphinx 4.x Peter Maydell
2021-06-03 15:58 ` [PULL 11/45] target/arm: Mark LDS{MIN,MAX} as signed operations Peter Maydell
2021-06-03 15:58 ` [PULL 12/45] target/arm: fix missing exception class Peter Maydell
2021-06-03 15:58 ` [PULL 13/45] target/arm: fold do_raise_exception into raise_exception Peter Maydell
2021-06-03 15:58 ` [PULL 14/45] target/arm: use raise_exception_ra for MTE check failure Peter Maydell
2021-06-03 15:58 ` [PULL 15/45] target/arm: use raise_exception_ra for stack limit exception Peter Maydell
2021-06-03 15:58 ` [PULL 16/45] target/arm: Add isar_feature_{aa32, aa64, aa64_sve}_bf16 Peter Maydell
2021-06-03 15:58 ` [PULL 17/45] target/arm: Unify unallocated path in disas_fp_1src Peter Maydell
2021-06-03 15:58 ` [PULL 18/45] target/arm: Implement scalar float32 to bfloat16 conversion Peter Maydell
2021-06-03 15:58 ` [PULL 19/45] target/arm: Implement vector " Peter Maydell
2021-06-03 15:58 ` [PULL 20/45] softfpu: Add float_round_to_odd_inf Peter Maydell
2021-06-03 15:58 ` [PULL 21/45] target/arm: Implement bfloat16 dot product (vector) Peter Maydell
2021-06-03 15:58 ` [PULL 22/45] target/arm: Implement bfloat16 dot product (indexed) Peter Maydell
2021-06-03 15:58 ` Peter Maydell [this message]
2021-06-03 15:58 ` [PULL 24/45] target/arm: Implement bfloat widening fma (vector) Peter Maydell
2021-06-03 15:58 ` [PULL 25/45] target/arm: Implement bfloat widening fma (indexed) Peter Maydell
2021-06-03 15:58 ` [PULL 26/45] linux-user/aarch64: Enable hwcap bits for bfloat16 Peter Maydell
2021-06-03 15:58 ` [PULL 27/45] target/arm: Enable BFloat16 extensions Peter Maydell
2021-06-03 15:58 ` [PULL 28/45] hvf: Move assert_hvf_ok() into common directory Peter Maydell
2021-06-03 15:58 ` [PULL 29/45] hvf: Move vcpu thread functions " Peter Maydell
2021-06-03 15:58 ` [PULL 30/45] hvf: Move cpu " Peter Maydell
2021-06-03 15:58 ` [PULL 31/45] hvf: Move hvf internal definitions into common header Peter Maydell
2021-06-03 15:58 ` [PULL 32/45] hvf: Make hvf_set_phys_mem() static Peter Maydell
2021-06-03 15:58 ` [PULL 33/45] hvf: Remove use of hv_uvaddr_t and hv_gpaddr_t Peter Maydell
2021-06-03 15:58 ` [PULL 34/45] hvf: Split out common code on vcpu init and destroy Peter Maydell
2021-06-03 15:58 ` [PULL 35/45] hvf: Use cpu_synchronize_state() Peter Maydell
2021-06-03 15:58 ` [PULL 36/45] hvf: Make synchronize functions static Peter Maydell
2021-06-03 15:58 ` [PULL 37/45] hvf: Remove hvf-accel-ops.h Peter Maydell
2021-06-03 15:58 ` [PULL 38/45] hvf: Introduce hvf vcpu struct Peter Maydell
2021-06-03 15:58 ` [PULL 39/45] hvf: Simplify post reset/init/loadvm hooks Peter Maydell
2021-06-03 15:58 ` [PULL 40/45] tests/qtest/bios-tables-test: Check for dup2() failure Peter Maydell
2021-06-03 15:59 ` [PULL 41/45] tests/qtest/e1000e-test: Check qemu_recv() succeeded Peter Maydell
2021-06-03 15:59 ` [PULL 42/45] tests/qtest/hd-geo-test: Fix checks on mkstemp() return value Peter Maydell
2021-06-03 15:59 ` [PULL 43/45] tests/qtest/pflash-cfi02-test: Avoid potential integer overflow Peter Maydell
2021-06-03 15:59 ` [PULL 44/45] tests/qtest/tpm-tests: Remove unnecessary NULL checks Peter Maydell
2021-06-03 15:59 ` [PULL 45/45] tests/unit/test-vmstate: Assert that dup() and mkstemp() succeed Peter Maydell
2021-06-03 16:42 ` [PULL 00/45] target-arm queue no-reply
2021-06-03 20:25 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210603155904.26021-24-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.