All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 24/45] target/arm: Implement bfloat widening fma (vector)
Date: Thu,  3 Jun 2021 16:58:43 +0100	[thread overview]
Message-ID: <20210603155904.26021-25-peter.maydell@linaro.org> (raw)
In-Reply-To: <20210603155904.26021-1-peter.maydell@linaro.org>

From: Richard Henderson <richard.henderson@linaro.org>

This is BFMLAL{B,T} for both AArch64 AdvSIMD and SVE,
and VFMA{B,T}.BF16 for AArch32 NEON.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210525225817.400336-10-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/helper.h           |  3 +++
 target/arm/neon-shared.decode |  3 +++
 target/arm/sve.decode         |  3 +++
 target/arm/translate-a64.c    | 13 +++++++++----
 target/arm/translate-neon.c   |  9 +++++++++
 target/arm/translate-sve.c    | 30 ++++++++++++++++++++++++++++++
 target/arm/vec_helper.c       | 16 ++++++++++++++++
 7 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index af75d7f25f2..36b3c9dd2d0 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -1010,6 +1010,9 @@ DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #include "helper-sve.h"
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
index 4e0a25d27c1..b61addd98b7 100644
--- a/target/arm/neon-shared.decode
+++ b/target/arm/neon-shared.decode
@@ -70,6 +70,9 @@ VUSMMLA        1111 1100 1.10 .... .... 1100 .1.0 .... \
 VMMLA_b16      1111 1100 0.00 .... .... 1100 .1.0 .... \
                vm=%vm_dp vn=%vn_dp vd=%vd_dp
 
+VFMA_b16       1111 110 0 0.11 .... .... 1000 . q:1 . 1 .... \
+               vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
 VCMLA_scalar   1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \
                vn=%vn_dp vd=%vd_dp size=1
 VCMLA_scalar   1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 6c17898deed..5281164eeae 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1627,6 +1627,9 @@ FMLALT_zzzw     01100100 10 1 ..... 10 0 00 1 ..... .....  @rda_rn_rm_e0
 FMLSLB_zzzw     01100100 10 1 ..... 10 1 00 0 ..... .....  @rda_rn_rm_e0
 FMLSLT_zzzw     01100100 10 1 ..... 10 1 00 1 ..... .....  @rda_rn_rm_e0
 
+BFMLALB_zzzw    01100100 11 1 ..... 10 0 00 0 ..... .....  @rda_rn_rm_e0
+BFMLALT_zzzw    01100100 11 1 ..... 10 0 00 1 ..... .....  @rda_rn_rm_e0
+
 ### SVE2 floating-point bfloat16 dot-product
 BFDOT_zzzz      01100100 01 1 ..... 10 0 00 0 ..... .....  @rda_rn_rm_e0
 
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 9ce2f5a7d43..8dcb15ac0f7 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -12242,9 +12242,10 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
         }
         feature = dc_isar_feature(aa64_bf16, s);
         break;
-    case 0x1f: /* BFDOT */
+    case 0x1f:
         switch (size) {
-        case 1:
+        case 1: /* BFDOT */
+        case 3: /* BFMLAL{B,T} */
             feature = dc_isar_feature(aa64_bf16, s);
             break;
         default:
@@ -12338,11 +12339,15 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
     case 0xd: /* BFMMLA */
         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
         return;
-    case 0xf: /* BFDOT */
+    case 0xf:
         switch (size) {
-        case 1:
+        case 1: /* BFDOT */
             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
             break;
+        case 3: /* BFMLAL{B,T} */
+            gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
+                              gen_helper_gvec_bfmlal);
+            break;
         default:
             g_assert_not_reached();
         }
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
index 9d227a1e13d..4d0c2494dc5 100644
--- a/target/arm/translate-neon.c
+++ b/target/arm/translate-neon.c
@@ -4135,3 +4135,12 @@ static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
                         gen_helper_gvec_bfmmla);
 }
+
+static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
+{
+    if (!dc_isar_feature(aa32_bf16, s)) {
+        return false;
+    }
+    return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
+                             gen_helper_gvec_bfmlal);
+}
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 4f575dc3343..ba8f5d7b7db 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -8689,3 +8689,33 @@ static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
     }
     return true;
 }
+
+static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
+{
+    if (!dc_isar_feature(aa64_sve_bf16, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
+        unsigned vsz = vec_full_reg_size(s);
+
+        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vec_full_reg_offset(s, a->ra),
+                           status, vsz, vsz, sel,
+                           gen_helper_gvec_bfmlal);
+        tcg_temp_free_ptr(status);
+    }
+    return true;
+}
+
+static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_BFMLAL_zzzw(s, a, false);
+}
+
+static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_BFMLAL_zzzw(s, a, true);
+}
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 27e9bdd3299..d82736b5e66 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -2512,3 +2512,19 @@ void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
+
+void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va,
+                         void *stat, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    intptr_t sel = simd_data(desc);
+    float32 *d = vd, *a = va;
+    bfloat16 *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 4; ++i) {
+        float32 nn = n[H2(i * 2 + sel)] << 16;
+        float32 mm = m[H2(i * 2 + sel)] << 16;
+        d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], 0, stat);
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
-- 
2.20.1



  parent reply	other threads:[~2021-06-03 16:27 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-03 15:58 [PULL 00/45] target-arm queue Peter Maydell
2021-06-03 15:58 ` [PULL 01/45] target/arm: Add isar feature check functions for MVE Peter Maydell
2021-06-03 15:58 ` [PULL 02/45] target/arm: Update feature checks for insns which are "MVE or FP" Peter Maydell
2021-06-03 15:58 ` [PULL 03/45] target/arm: Move fpsp/fpdp isar check into callers of do_vfp_2op_sp/dp Peter Maydell
2021-06-03 15:58 ` [PULL 04/45] target/arm: Add MVE check to VMOV_reg_sp and VMOV_reg_dp Peter Maydell
2021-06-03 15:58 ` [PULL 05/45] target/arm: Fix return values in fp_sysreg_checks() Peter Maydell
2021-06-03 15:58 ` [PULL 06/45] target/arm: Implement M-profile VPR register Peter Maydell
2021-06-03 15:58 ` [PULL 07/45] target/arm: Make FPSCR.LTPSIZE writable for MVE Peter Maydell
2021-06-03 15:58 ` [PULL 08/45] target/arm: Allow board models to specify initial NS VTOR Peter Maydell
2021-06-03 15:58 ` [PULL 09/45] arm: Consistently use "Cortex-Axx", not "Cortex Axx" Peter Maydell
2021-06-03 15:58 ` [PULL 10/45] docs: Fix installation of man pages with Sphinx 4.x Peter Maydell
2021-06-03 15:58 ` [PULL 11/45] target/arm: Mark LDS{MIN,MAX} as signed operations Peter Maydell
2021-06-03 15:58 ` [PULL 12/45] target/arm: fix missing exception class Peter Maydell
2021-06-03 15:58 ` [PULL 13/45] target/arm: fold do_raise_exception into raise_exception Peter Maydell
2021-06-03 15:58 ` [PULL 14/45] target/arm: use raise_exception_ra for MTE check failure Peter Maydell
2021-06-03 15:58 ` [PULL 15/45] target/arm: use raise_exception_ra for stack limit exception Peter Maydell
2021-06-03 15:58 ` [PULL 16/45] target/arm: Add isar_feature_{aa32, aa64, aa64_sve}_bf16 Peter Maydell
2021-06-03 15:58 ` [PULL 17/45] target/arm: Unify unallocated path in disas_fp_1src Peter Maydell
2021-06-03 15:58 ` [PULL 18/45] target/arm: Implement scalar float32 to bfloat16 conversion Peter Maydell
2021-06-03 15:58 ` [PULL 19/45] target/arm: Implement vector " Peter Maydell
2021-06-03 15:58 ` [PULL 20/45] softfpu: Add float_round_to_odd_inf Peter Maydell
2021-06-03 15:58 ` [PULL 21/45] target/arm: Implement bfloat16 dot product (vector) Peter Maydell
2021-06-03 15:58 ` [PULL 22/45] target/arm: Implement bfloat16 dot product (indexed) Peter Maydell
2021-06-03 15:58 ` [PULL 23/45] target/arm: Implement bfloat16 matrix multiply accumulate Peter Maydell
2021-06-03 15:58 ` Peter Maydell [this message]
2021-06-03 15:58 ` [PULL 25/45] target/arm: Implement bfloat widening fma (indexed) Peter Maydell
2021-06-03 15:58 ` [PULL 26/45] linux-user/aarch64: Enable hwcap bits for bfloat16 Peter Maydell
2021-06-03 15:58 ` [PULL 27/45] target/arm: Enable BFloat16 extensions Peter Maydell
2021-06-03 15:58 ` [PULL 28/45] hvf: Move assert_hvf_ok() into common directory Peter Maydell
2021-06-03 15:58 ` [PULL 29/45] hvf: Move vcpu thread functions " Peter Maydell
2021-06-03 15:58 ` [PULL 30/45] hvf: Move cpu " Peter Maydell
2021-06-03 15:58 ` [PULL 31/45] hvf: Move hvf internal definitions into common header Peter Maydell
2021-06-03 15:58 ` [PULL 32/45] hvf: Make hvf_set_phys_mem() static Peter Maydell
2021-06-03 15:58 ` [PULL 33/45] hvf: Remove use of hv_uvaddr_t and hv_gpaddr_t Peter Maydell
2021-06-03 15:58 ` [PULL 34/45] hvf: Split out common code on vcpu init and destroy Peter Maydell
2021-06-03 15:58 ` [PULL 35/45] hvf: Use cpu_synchronize_state() Peter Maydell
2021-06-03 15:58 ` [PULL 36/45] hvf: Make synchronize functions static Peter Maydell
2021-06-03 15:58 ` [PULL 37/45] hvf: Remove hvf-accel-ops.h Peter Maydell
2021-06-03 15:58 ` [PULL 38/45] hvf: Introduce hvf vcpu struct Peter Maydell
2021-06-03 15:58 ` [PULL 39/45] hvf: Simplify post reset/init/loadvm hooks Peter Maydell
2021-06-03 15:58 ` [PULL 40/45] tests/qtest/bios-tables-test: Check for dup2() failure Peter Maydell
2021-06-03 15:59 ` [PULL 41/45] tests/qtest/e1000e-test: Check qemu_recv() succeeded Peter Maydell
2021-06-03 15:59 ` [PULL 42/45] tests/qtest/hd-geo-test: Fix checks on mkstemp() return value Peter Maydell
2021-06-03 15:59 ` [PULL 43/45] tests/qtest/pflash-cfi02-test: Avoid potential integer overflow Peter Maydell
2021-06-03 15:59 ` [PULL 44/45] tests/qtest/tpm-tests: Remove unnecessary NULL checks Peter Maydell
2021-06-03 15:59 ` [PULL 45/45] tests/unit/test-vmstate: Assert that dup() and mkstemp() succeed Peter Maydell
2021-06-03 16:42 ` [PULL 00/45] target-arm queue no-reply
2021-06-03 20:25 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210603155904.26021-25-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.