All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-arm@nongnu.org, qemu-devel@nongnu.org
Subject: [PATCH v2 31/45] target/arm: Implement fp16 for Neon VFMA, VMFS
Date: Fri, 28 Aug 2020 19:33:40 +0100	[thread overview]
Message-ID: <20200828183354.27913-32-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200828183354.27913-1-peter.maydell@linaro.org>

Convert the neon floating-point vector operations VFMA and VFMS
to use a gvec helper, and use this to implement the fp16 case.

This is the last use of do_3same_fp() so we can now delete
that function.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/helper.h             |  6 +++
 target/arm/vec_helper.c         | 33 +++++++++++-
 target/arm/translate-neon.c.inc | 92 +--------------------------------
 3 files changed, 40 insertions(+), 91 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 6f6c96711b7..e6f65c74614 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -665,6 +665,12 @@ DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(gvec_fmls_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 5da5969c1c0..995f09fb71e 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -867,7 +867,32 @@ static float32 float32_mulsub_nf(float32 dest, float32 op1, float32 op2,
     return float32_sub(dest, float32_mul(op1, op2, stat), stat);
 }
 
-#define DO_MULADD(NAME, FUNC, TYPE) \
+/* Fused versions; these have the semantics Neon VFMA/VFMS want */
+static float16 float16_muladd_f(float16 dest, float16 op1, float16 op2,
+                                float_status *stat)
+{
+    return float16_muladd(op1, op2, dest, 0, stat);
+}
+
+static float32 float32_muladd_f(float32 dest, float32 op1, float32 op2,
+                                 float_status *stat)
+{
+    return float32_muladd(op1, op2, dest, 0, stat);
+}
+
+static float16 float16_mulsub_f(float16 dest, float16 op1, float16 op2,
+                                 float_status *stat)
+{
+    return float16_muladd(float16_chs(op1), op2, dest, 0, stat);
+}
+
+static float32 float32_mulsub_f(float32 dest, float32 op1, float32 op2,
+                                 float_status *stat)
+{
+    return float32_muladd(float32_chs(op1), op2, dest, 0, stat);
+}
+
+#define DO_MULADD(NAME, FUNC, TYPE)                                     \
 void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
 {                                                                          \
     intptr_t i, oprsz = simd_oprsz(desc);                                  \
@@ -884,6 +909,12 @@ DO_MULADD(gvec_fmla_s, float32_muladd_nf, float32)
 DO_MULADD(gvec_fmls_h, float16_mulsub_nf, float16)
 DO_MULADD(gvec_fmls_s, float32_mulsub_nf, float32)
 
+DO_MULADD(gvec_vfma_h, float16_muladd_f, float16)
+DO_MULADD(gvec_vfma_s, float32_muladd_f, float32)
+
+DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16)
+DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
+
 /* For the indexed ops, SVE applies the index per 128-bit vector segment.
  * For AdvSIMD, there is of course only one such vector segment.
  */
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
index 1f2522f120a..cf5eab784bd 100644
--- a/target/arm/translate-neon.c.inc
+++ b/target/arm/translate-neon.c.inc
@@ -1033,55 +1033,6 @@ DO_3SAME_PAIR(VPADD, padd_u)
 DO_3SAME_VQDMULH(VQDMULH, qdmulh)
 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
 
-static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn,
-                        bool reads_vd)
-{
-    /*
-     * FP operations handled elementwise 32 bits at a time.
-     * If reads_vd is true then the old value of Vd will be
-     * loaded before calling the callback function. This is
-     * used for multiply-accumulate type operations.
-     */
-    TCGv_i32 tmp, tmp2;
-    int pass;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vn | a->vm | a->vd) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    TCGv_ptr fpstatus = fpstatus_ptr(FPST_STD);
-    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
-        tmp = neon_load_reg(a->vn, pass);
-        tmp2 = neon_load_reg(a->vm, pass);
-        if (reads_vd) {
-            TCGv_i32 tmp_rd = neon_load_reg(a->vd, pass);
-            fn(tmp_rd, tmp, tmp2, fpstatus);
-            neon_store_reg(a->vd, pass, tmp_rd);
-            tcg_temp_free_i32(tmp);
-        } else {
-            fn(tmp, tmp, tmp2, fpstatus);
-            neon_store_reg(a->vd, pass, tmp);
-        }
-        tcg_temp_free_i32(tmp2);
-    }
-    tcg_temp_free_ptr(fpstatus);
-    return true;
-}
-
 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC)                              \
     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
                          uint32_t rn_ofs, uint32_t rm_ofs,              \
@@ -1121,6 +1072,8 @@ DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
+DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
+DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
 
 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
@@ -1197,47 +1150,6 @@ static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
     return do_3same(s, a, gen_VRSQRTS_fp_3s);
 }
 
-static void gen_VFMA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
-                            TCGv_ptr fpstatus)
-{
-    gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
-}
-
-static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a)
-{
-    if (!dc_isar_feature(aa32_simdfmac, s)) {
-        return false;
-    }
-
-    if (a->size != 0) {
-        /* TODO fp16 support */
-        return false;
-    }
-
-    return do_3same_fp(s, a, gen_VFMA_fp_3s, true);
-}
-
-static void gen_VFMS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
-                            TCGv_ptr fpstatus)
-{
-    gen_helper_vfp_negs(vn, vn);
-    gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
-}
-
-static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a)
-{
-    if (!dc_isar_feature(aa32_simdfmac, s)) {
-        return false;
-    }
-
-    if (a->size != 0) {
-        /* TODO fp16 support */
-        return false;
-    }
-
-    return do_3same_fp(s, a, gen_VFMS_fp_3s, true);
-}
-
 static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
 {
     /* FP operations handled pairwise 32 bits at a time */
-- 
2.20.1



  parent reply	other threads:[~2020-08-28 18:48 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-28 18:33 [PATCH v2 00/45] target/arm: Implement fp16 for AArch32 VFP and Neon Peter Maydell
2020-08-28 18:33 ` [PATCH v2 01/45] target/arm: Remove local definitions of float constants Peter Maydell
2020-08-28 18:33 ` [PATCH v2 02/45] target/arm: Use correct ID register check for aa32_fp16_arith Peter Maydell
2020-08-28 18:33 ` [PATCH v2 03/45] target/arm: Implement VFP fp16 for VFP_BINOP operations Peter Maydell
2020-08-28 18:33 ` [PATCH v2 04/45] target/arm: Implement VFP fp16 VMLA, VMLS, VNMLS, VNMLA, VNMUL Peter Maydell
2020-08-28 18:33 ` [PATCH v2 05/45] target/arm: Macroify trans functions for VFMA, VFMS, VFNMA, VFNMS Peter Maydell
2020-08-28 18:33 ` [PATCH v2 06/45] target/arm: Implement VFP fp16 for fused-multiply-add Peter Maydell
2020-08-28 18:33 ` [PATCH v2 07/45] target/arm: Macroify uses of do_vfp_2op_sp() and do_vfp_2op_dp() Peter Maydell
2020-08-28 18:33 ` [PATCH v2 08/45] target/arm: Implement VFP fp16 for VABS, VNEG, VSQRT Peter Maydell
2020-08-28 18:33 ` [PATCH v2 09/45] target/arm: Implement VFP fp16 for VMOV immediate Peter Maydell
2020-08-28 18:33 ` [PATCH v2 10/45] target/arm: Implement VFP fp16 VCMP Peter Maydell
2020-08-28 18:33 ` [PATCH v2 11/45] target/arm: Implement VFP fp16 VLDR and VSTR Peter Maydell
2020-08-28 18:33 ` [PATCH v2 12/45] target/arm: Implement VFP fp16 VCVT between float and integer Peter Maydell
2020-08-28 18:33 ` [PATCH v2 13/45] target/arm: Make VFP_CONV_FIX macros take separate float type and float size Peter Maydell
2020-08-28 18:33 ` [PATCH v2 14/45] target/arm: Use macros instead of open-coding fp16 conversion helpers Peter Maydell
2020-08-28 18:33 ` [PATCH v2 15/45] target/arm: Implement VFP fp16 VCVT between float and fixed-point Peter Maydell
2020-08-28 18:33 ` [PATCH v2 16/45] target/arm: Implement VFP vp16 VCVT-with-specified-rounding-mode Peter Maydell
2020-08-28 18:33 ` [PATCH v2 17/45] target/arm: Implement VFP fp16 VSEL Peter Maydell
2020-08-28 18:33 ` [PATCH v2 18/45] target/arm: Implement VFP fp16 VRINT* Peter Maydell
2020-08-28 18:33 ` [PATCH v2 19/45] target/arm: Implement new VFP fp16 insn VINS Peter Maydell
2020-08-28 18:33 ` [PATCH v2 20/45] target/arm: Implement new VFP fp16 insn VMOVX Peter Maydell
2020-08-28 18:33 ` [PATCH v2 21/45] target/arm: Implement VFP fp16 VMOV between gp and halfprec registers Peter Maydell
2020-08-28 18:33 ` [PATCH v2 22/45] fpu: Add float16 comparison functions Peter Maydell
2020-08-28 20:02   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 23/45] target/arm: Implement FP16 for Neon VADD, VSUB, VABD, VMUL Peter Maydell
2020-08-28 20:06   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 24/45] target/arm: Implement fp16 for Neon VRECPE, VRSQRTE using gvec Peter Maydell
2020-08-28 20:10   ` Richard Henderson
2020-08-28 21:40     ` Peter Maydell
2020-08-28 22:53       ` Richard Henderson
2020-08-29 13:53         ` Peter Maydell
2020-08-29 15:30           ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 25/45] target/arm: Implement fp16 for Neon VABS, VNEG of floats Peter Maydell
2020-08-28 20:33   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 26/45] target/arm: Implement fp16 for VCEQ, VCGE, VCGT comparisons Peter Maydell
2020-08-28 20:45   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 27/45] target/arm: Implement fp16 for VACGE, VACGT Peter Maydell
2020-08-28 20:46   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 28/45] target/arm: Implement fp16 for Neon VMAX, VMIN Peter Maydell
2020-08-28 20:46   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 29/45] target/arm: Implement fp16 for Neon VMAXNM, VMINNM Peter Maydell
2020-08-28 20:52   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 30/45] target/arm: Implement fp16 for Neon VMLA, VMLS operations Peter Maydell
2020-08-28 20:54   ` Richard Henderson
2020-08-28 18:33 ` Peter Maydell [this message]
2020-08-28 22:55   ` [PATCH v2 31/45] target/arm: Implement fp16 for Neon VFMA, VMFS Richard Henderson
2020-08-28 18:33 ` [PATCH v2 32/45] target/arm: Implement fp16 for Neon fp compare-vs-0 Peter Maydell
2020-08-28 22:57   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 33/45] target/arm: Implement fp16 for Neon VRECPS Peter Maydell
2020-08-28 23:02   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 34/45] target/arm: Implement fp16 for Neon VRSQRTS Peter Maydell
2020-08-28 23:03   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 35/45] target/arm: Implement fp16 for Neon pairwise fp ops Peter Maydell
2020-08-28 23:05   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 36/45] target/arm: Implement fp16 for Neon float-integer VCVT Peter Maydell
2020-08-28 23:07   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 37/45] target/arm: Convert Neon VCVT fixed-point to gvec Peter Maydell
2020-08-28 23:08   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 38/45] target/arm: Implement fp16 for Neon VCVT fixed-point Peter Maydell
2020-08-28 23:10   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 39/45] target/arm: Implement fp16 for Neon VCVT with rounding modes Peter Maydell
2020-08-28 23:13   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 40/45] target/arm: Implement fp16 for Neon VRINT-with-specified-rounding-mode Peter Maydell
2020-08-28 23:15   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 41/45] target/arm: Implement fp16 for Neon VRINTX Peter Maydell
2020-08-28 23:16   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 42/45] target/arm/vec_helper: Handle oprsz less than 16 bytes in indexed operations Peter Maydell
2020-08-28 23:17   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 43/45] target/arm/vec_helper: Add gvec fp indexed multiply-and-add operations Peter Maydell
2020-08-28 23:24   ` Richard Henderson
2020-08-29 13:51     ` Peter Maydell
2020-08-28 18:33 ` [PATCH v2 44/45] target/arm: Implement fp16 for Neon VMUL, VMLA, VMLS Peter Maydell
2020-08-28 23:38   ` Richard Henderson
2020-08-29 13:52     ` Peter Maydell
2020-08-28 18:33 ` [PATCH v2 45/45] target/arm: Enable FP16 in '-cpu max' Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200828183354.27913-32-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.