All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 21/45] target/arm: Implement bfloat16 dot product (vector)
Date: Thu,  3 Jun 2021 16:58:40 +0100	[thread overview]
Message-ID: <20210603155904.26021-22-peter.maydell@linaro.org> (raw)
In-Reply-To: <20210603155904.26021-1-peter.maydell@linaro.org>

From: Richard Henderson <richard.henderson@linaro.org>

This is BFDOT for both AArch64 AdvSIMD and SVE,
and VDOT.BF16 for AArch32 NEON.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210525225817.400336-7-richard.henderson@linaro.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/helper.h           |  3 +++
 target/arm/neon-shared.decode |  2 ++
 target/arm/sve.decode         |  3 +++
 target/arm/translate-a64.c    | 20 ++++++++++++++++++
 target/arm/translate-neon.c   |  9 ++++++++
 target/arm/translate-sve.c    | 12 +++++++++++
 target/arm/vec_helper.c       | 40 +++++++++++++++++++++++++++++++++++
 7 files changed, 89 insertions(+)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 8b4b7d92f36..de2f5331dcc 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -1002,6 +1002,9 @@ DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #include "helper-sve.h"
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
index cc9f4cdd85b..31a0839bbb0 100644
--- a/target/arm/neon-shared.decode
+++ b/target/arm/neon-shared.decode
@@ -52,6 +52,8 @@ VUDOT          1111 110 00 . 10 .... .... 1101 . q:1 . 1 .... \
                vm=%vm_dp vn=%vn_dp vd=%vd_dp
 VUSDOT         1111 110 01 . 10 .... .... 1101 . q:1 . 0 .... \
                vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VDOT_b16       1111 110 00 . 00 .... .... 1101 . q:1 . 0 .... \
+               vm=%vm_dp vn=%vn_dp vd=%vd_dp
 
 # VFM[AS]L
 VFML           1111 110 0 s:1 . 10 .... .... 1000 . 0 . 1 .... \
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 18d1a0eecc5..a7429b293fe 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1625,6 +1625,9 @@ FMLALT_zzzw     01100100 10 1 ..... 10 0 00 1 ..... .....  @rda_rn_rm_e0
 FMLSLB_zzzw     01100100 10 1 ..... 10 1 00 0 ..... .....  @rda_rn_rm_e0
 FMLSLT_zzzw     01100100 10 1 ..... 10 1 00 1 ..... .....  @rda_rn_rm_e0
 
+### SVE2 floating-point bfloat16 dot-product
+BFDOT_zzzz      01100100 01 1 ..... 10 0 00 0 ..... .....  @rda_rn_rm_e0
+
 ### SVE2 floating-point multiply-add long (indexed)
 FMLALB_zzxw     01100100 10 1 ..... 0100.0 ..... .....     @rrxr_3a esz=2
 FMLALT_zzxw     01100100 10 1 ..... 0100.1 ..... .....     @rrxr_3a esz=2
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 0f15fa42fc3..3c36de3df20 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -12235,6 +12235,16 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
         }
         feature = dc_isar_feature(aa64_fcma, s);
         break;
+    case 0x1f: /* BFDOT */
+        switch (size) {
+        case 1:
+            feature = dc_isar_feature(aa64_bf16, s);
+            break;
+        default:
+            unallocated_encoding(s);
+            return;
+        }
+        break;
     default:
         unallocated_encoding(s);
         return;
@@ -12318,6 +12328,16 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
         }
         return;
 
+    case 0xf: /* BFDOT */
+        switch (size) {
+        case 1:
+            gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        return;
+
     default:
         g_assert_not_reached();
     }
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
index 6d94229c691..9460857b2ad 100644
--- a/target/arm/translate-neon.c
+++ b/target/arm/translate-neon.c
@@ -296,6 +296,15 @@ static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
                         gen_helper_gvec_usdot_b);
 }
 
+static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
+{
+    if (!dc_isar_feature(aa32_bf16, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+                        gen_helper_gvec_bfdot);
+}
+
 static bool trans_VFML(DisasContext *s, arg_VFML *a)
 {
     int opr_sz;
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index fb692a18351..ed290827ad2 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -8653,3 +8653,15 @@ static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a)
 {
     return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0);
 }
+
+static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_bf16, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot,
+                          a->rd, a->rn, a->rm, a->ra, 0);
+    }
+    return true;
+}
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index e84b438340e..7eefcd06eae 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -2412,3 +2412,43 @@ static void do_mmla_b(void *vd, void *vn, void *vm, void *va, uint32_t desc,
 DO_MMLA_B(gvec_smmla_b, do_smmla_b)
 DO_MMLA_B(gvec_ummla_b, do_ummla_b)
 DO_MMLA_B(gvec_usmmla_b, do_usmmla_b)
+
+/*
+ * BFloat16 Dot Product
+ */
+
+static float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2)
+{
+    /* FPCR is ignored for BFDOT and BFMMLA. */
+    float_status bf_status = {
+        .tininess_before_rounding = float_tininess_before_rounding,
+        .float_rounding_mode = float_round_to_odd_inf,
+        .flush_to_zero = true,
+        .flush_inputs_to_zero = true,
+        .default_nan_mode = true,
+    };
+    float32 t1, t2;
+
+    /*
+     * Extract each BFloat16 from the element pair, and shift
+     * them such that they become float32.
+     */
+    t1 = float32_mul(e1 << 16, e2 << 16, &bf_status);
+    t2 = float32_mul(e1 & 0xffff0000u, e2 & 0xffff0000u, &bf_status);
+    t1 = float32_add(t1, t2, &bf_status);
+    t1 = float32_add(sum, t1, &bf_status);
+
+    return t1;
+}
+
+void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    float32 *d = vd, *a = va;
+    uint32_t *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 4; ++i) {
+        d[i] = bfdotadd(a[i], n[i], m[i]);
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
-- 
2.20.1



  parent reply	other threads:[~2021-06-03 16:25 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-03 15:58 [PULL 00/45] target-arm queue Peter Maydell
2021-06-03 15:58 ` [PULL 01/45] target/arm: Add isar feature check functions for MVE Peter Maydell
2021-06-03 15:58 ` [PULL 02/45] target/arm: Update feature checks for insns which are "MVE or FP" Peter Maydell
2021-06-03 15:58 ` [PULL 03/45] target/arm: Move fpsp/fpdp isar check into callers of do_vfp_2op_sp/dp Peter Maydell
2021-06-03 15:58 ` [PULL 04/45] target/arm: Add MVE check to VMOV_reg_sp and VMOV_reg_dp Peter Maydell
2021-06-03 15:58 ` [PULL 05/45] target/arm: Fix return values in fp_sysreg_checks() Peter Maydell
2021-06-03 15:58 ` [PULL 06/45] target/arm: Implement M-profile VPR register Peter Maydell
2021-06-03 15:58 ` [PULL 07/45] target/arm: Make FPSCR.LTPSIZE writable for MVE Peter Maydell
2021-06-03 15:58 ` [PULL 08/45] target/arm: Allow board models to specify initial NS VTOR Peter Maydell
2021-06-03 15:58 ` [PULL 09/45] arm: Consistently use "Cortex-Axx", not "Cortex Axx" Peter Maydell
2021-06-03 15:58 ` [PULL 10/45] docs: Fix installation of man pages with Sphinx 4.x Peter Maydell
2021-06-03 15:58 ` [PULL 11/45] target/arm: Mark LDS{MIN,MAX} as signed operations Peter Maydell
2021-06-03 15:58 ` [PULL 12/45] target/arm: fix missing exception class Peter Maydell
2021-06-03 15:58 ` [PULL 13/45] target/arm: fold do_raise_exception into raise_exception Peter Maydell
2021-06-03 15:58 ` [PULL 14/45] target/arm: use raise_exception_ra for MTE check failure Peter Maydell
2021-06-03 15:58 ` [PULL 15/45] target/arm: use raise_exception_ra for stack limit exception Peter Maydell
2021-06-03 15:58 ` [PULL 16/45] target/arm: Add isar_feature_{aa32, aa64, aa64_sve}_bf16 Peter Maydell
2021-06-03 15:58 ` [PULL 17/45] target/arm: Unify unallocated path in disas_fp_1src Peter Maydell
2021-06-03 15:58 ` [PULL 18/45] target/arm: Implement scalar float32 to bfloat16 conversion Peter Maydell
2021-06-03 15:58 ` [PULL 19/45] target/arm: Implement vector " Peter Maydell
2021-06-03 15:58 ` [PULL 20/45] softfpu: Add float_round_to_odd_inf Peter Maydell
2021-06-03 15:58 ` Peter Maydell [this message]
2021-06-03 15:58 ` [PULL 22/45] target/arm: Implement bfloat16 dot product (indexed) Peter Maydell
2021-06-03 15:58 ` [PULL 23/45] target/arm: Implement bfloat16 matrix multiply accumulate Peter Maydell
2021-06-03 15:58 ` [PULL 24/45] target/arm: Implement bfloat widening fma (vector) Peter Maydell
2021-06-03 15:58 ` [PULL 25/45] target/arm: Implement bfloat widening fma (indexed) Peter Maydell
2021-06-03 15:58 ` [PULL 26/45] linux-user/aarch64: Enable hwcap bits for bfloat16 Peter Maydell
2021-06-03 15:58 ` [PULL 27/45] target/arm: Enable BFloat16 extensions Peter Maydell
2021-06-03 15:58 ` [PULL 28/45] hvf: Move assert_hvf_ok() into common directory Peter Maydell
2021-06-03 15:58 ` [PULL 29/45] hvf: Move vcpu thread functions " Peter Maydell
2021-06-03 15:58 ` [PULL 30/45] hvf: Move cpu " Peter Maydell
2021-06-03 15:58 ` [PULL 31/45] hvf: Move hvf internal definitions into common header Peter Maydell
2021-06-03 15:58 ` [PULL 32/45] hvf: Make hvf_set_phys_mem() static Peter Maydell
2021-06-03 15:58 ` [PULL 33/45] hvf: Remove use of hv_uvaddr_t and hv_gpaddr_t Peter Maydell
2021-06-03 15:58 ` [PULL 34/45] hvf: Split out common code on vcpu init and destroy Peter Maydell
2021-06-03 15:58 ` [PULL 35/45] hvf: Use cpu_synchronize_state() Peter Maydell
2021-06-03 15:58 ` [PULL 36/45] hvf: Make synchronize functions static Peter Maydell
2021-06-03 15:58 ` [PULL 37/45] hvf: Remove hvf-accel-ops.h Peter Maydell
2021-06-03 15:58 ` [PULL 38/45] hvf: Introduce hvf vcpu struct Peter Maydell
2021-06-03 15:58 ` [PULL 39/45] hvf: Simplify post reset/init/loadvm hooks Peter Maydell
2021-06-03 15:58 ` [PULL 40/45] tests/qtest/bios-tables-test: Check for dup2() failure Peter Maydell
2021-06-03 15:59 ` [PULL 41/45] tests/qtest/e1000e-test: Check qemu_recv() succeeded Peter Maydell
2021-06-03 15:59 ` [PULL 42/45] tests/qtest/hd-geo-test: Fix checks on mkstemp() return value Peter Maydell
2021-06-03 15:59 ` [PULL 43/45] tests/qtest/pflash-cfi02-test: Avoid potential integer overflow Peter Maydell
2021-06-03 15:59 ` [PULL 44/45] tests/qtest/tpm-tests: Remove unnecessary NULL checks Peter Maydell
2021-06-03 15:59 ` [PULL 45/45] tests/unit/test-vmstate: Assert that dup() and mkstemp() succeed Peter Maydell
2021-06-03 16:42 ` [PULL 00/45] target-arm queue no-reply
2021-06-03 20:25 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210603155904.26021-22-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.