All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-arm@nongnu.org, qemu-devel@nongnu.org
Subject: [PATCH v2 04/45] target/arm: Implement VFP fp16 VMLA, VMLS, VNMLS, VNMLA, VNMUL
Date: Fri, 28 Aug 2020 19:33:13 +0100	[thread overview]
Message-ID: <20200828183354.27913-5-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200828183354.27913-1-peter.maydell@linaro.org>

Implement fp16 versions of the VFP VMLA, VMLS, VNMLS, VNMLA, VNMUL
instructions. (These are all the remaining ones which we implement
via do_vfp_3op_[hsd]p().)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/helper.h            |  1 +
 target/arm/vfp.decode          |  5 ++
 target/arm/vfp_helper.c        |  5 ++
 target/arm/translate-vfp.c.inc | 84 ++++++++++++++++++++++++++++++++++
 4 files changed, 95 insertions(+)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 61e4e938861..58f9c4e933e 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -125,6 +125,7 @@ DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
 DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
 DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
 DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
+DEF_HELPER_1(vfp_negh, f16, f16)
 DEF_HELPER_1(vfp_negs, f32, f32)
 DEF_HELPER_1(vfp_negd, f64, f64)
 DEF_HELPER_1(vfp_abss, f32, f32)
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
index 1ecd5e28ca0..e5545076a51 100644
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@@ -103,15 +103,19 @@ VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
              vd=%vd_dp p=1 u=0 w=1
 
 # 3-register VFP data-processing; bits [23,21:20,6] identify the operation.
+VMLA_hp      ---- 1110 0.00 .... .... 1001 .0.0 ....        @vfp_dnm_s
 VMLA_sp      ---- 1110 0.00 .... .... 1010 .0.0 ....        @vfp_dnm_s
 VMLA_dp      ---- 1110 0.00 .... .... 1011 .0.0 ....        @vfp_dnm_d
 
+VMLS_hp      ---- 1110 0.00 .... .... 1001 .1.0 ....        @vfp_dnm_s
 VMLS_sp      ---- 1110 0.00 .... .... 1010 .1.0 ....        @vfp_dnm_s
 VMLS_dp      ---- 1110 0.00 .... .... 1011 .1.0 ....        @vfp_dnm_d
 
+VNMLS_hp     ---- 1110 0.01 .... .... 1001 .0.0 ....        @vfp_dnm_s
 VNMLS_sp     ---- 1110 0.01 .... .... 1010 .0.0 ....        @vfp_dnm_s
 VNMLS_dp     ---- 1110 0.01 .... .... 1011 .0.0 ....        @vfp_dnm_d
 
+VNMLA_hp     ---- 1110 0.01 .... .... 1001 .1.0 ....        @vfp_dnm_s
 VNMLA_sp     ---- 1110 0.01 .... .... 1010 .1.0 ....        @vfp_dnm_s
 VNMLA_dp     ---- 1110 0.01 .... .... 1011 .1.0 ....        @vfp_dnm_d
 
@@ -119,6 +123,7 @@ VMUL_hp      ---- 1110 0.10 .... .... 1001 .0.0 ....        @vfp_dnm_s
 VMUL_sp      ---- 1110 0.10 .... .... 1010 .0.0 ....        @vfp_dnm_s
 VMUL_dp      ---- 1110 0.10 .... .... 1011 .0.0 ....        @vfp_dnm_d
 
+VNMUL_hp     ---- 1110 0.10 .... .... 1001 .1.0 ....        @vfp_dnm_s
 VNMUL_sp     ---- 1110 0.10 .... .... 1010 .1.0 ....        @vfp_dnm_s
 VNMUL_dp     ---- 1110 0.10 .... .... 1011 .1.0 ....        @vfp_dnm_d
 
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index b8ca744bccc..f93ddf0b208 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -261,6 +261,11 @@ VFP_BINOP(minnum)
 VFP_BINOP(maxnum)
 #undef VFP_BINOP
 
+dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a)
+{
+    return float16_chs(a);
+}
+
 float32 VFP_HELPER(neg, s)(float32 a)
 {
     return float32_chs(a);
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
index 01a5fd65115..15bb23688bd 100644
--- a/target/arm/translate-vfp.c.inc
+++ b/target/arm/translate-vfp.c.inc
@@ -1547,6 +1547,21 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
     return true;
 }
 
+static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /* Note that order of inputs to the add matters for NaNs */
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+    gen_helper_vfp_addh(vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
+}
+
 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
 {
     /* Note that order of inputs to the add matters for NaNs */
@@ -1577,6 +1592,25 @@ static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
 }
 
+static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /*
+     * VMLS: vd = vd + -(vn * vm)
+     * Note that order of inputs to the add matters for NaNs.
+     */
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+    gen_helper_vfp_negh(tmp, tmp);
+    gen_helper_vfp_addh(vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
+}
+
 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
 {
     /*
@@ -1615,6 +1649,27 @@ static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
 }
 
+static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /*
+     * VNMLS: -fd + (fn * fm)
+     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
+     * plausible looking simplifications because this will give wrong results
+     * for NaNs.
+     */
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+    gen_helper_vfp_negh(vd, vd);
+    gen_helper_vfp_addh(vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
+}
+
 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
 {
     /*
@@ -1657,6 +1712,23 @@ static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
 }
 
+static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /* VNMLA: -fd + -(fn * fm) */
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+    gen_helper_vfp_negh(tmp, tmp);
+    gen_helper_vfp_negh(vd, vd);
+    gen_helper_vfp_addh(vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
+}
+
 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
 {
     /* VNMLA: -fd + -(fn * fm) */
@@ -1706,6 +1778,18 @@ static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
 }
 
+static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /* VNMUL: -(fn * fm) */
+    gen_helper_vfp_mulh(vd, vn, vm, fpst);
+    gen_helper_vfp_negh(vd, vd);
+}
+
+static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
+}
+
 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
 {
     /* VNMUL: -(fn * fm) */
-- 
2.20.1



  parent reply	other threads:[~2020-08-28 18:42 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-28 18:33 [PATCH v2 00/45] target/arm: Implement fp16 for AArch32 VFP and Neon Peter Maydell
2020-08-28 18:33 ` [PATCH v2 01/45] target/arm: Remove local definitions of float constants Peter Maydell
2020-08-28 18:33 ` [PATCH v2 02/45] target/arm: Use correct ID register check for aa32_fp16_arith Peter Maydell
2020-08-28 18:33 ` [PATCH v2 03/45] target/arm: Implement VFP fp16 for VFP_BINOP operations Peter Maydell
2020-08-28 18:33 ` Peter Maydell [this message]
2020-08-28 18:33 ` [PATCH v2 05/45] target/arm: Macroify trans functions for VFMA, VFMS, VFNMA, VFNMS Peter Maydell
2020-08-28 18:33 ` [PATCH v2 06/45] target/arm: Implement VFP fp16 for fused-multiply-add Peter Maydell
2020-08-28 18:33 ` [PATCH v2 07/45] target/arm: Macroify uses of do_vfp_2op_sp() and do_vfp_2op_dp() Peter Maydell
2020-08-28 18:33 ` [PATCH v2 08/45] target/arm: Implement VFP fp16 for VABS, VNEG, VSQRT Peter Maydell
2020-08-28 18:33 ` [PATCH v2 09/45] target/arm: Implement VFP fp16 for VMOV immediate Peter Maydell
2020-08-28 18:33 ` [PATCH v2 10/45] target/arm: Implement VFP fp16 VCMP Peter Maydell
2020-08-28 18:33 ` [PATCH v2 11/45] target/arm: Implement VFP fp16 VLDR and VSTR Peter Maydell
2020-08-28 18:33 ` [PATCH v2 12/45] target/arm: Implement VFP fp16 VCVT between float and integer Peter Maydell
2020-08-28 18:33 ` [PATCH v2 13/45] target/arm: Make VFP_CONV_FIX macros take separate float type and float size Peter Maydell
2020-08-28 18:33 ` [PATCH v2 14/45] target/arm: Use macros instead of open-coding fp16 conversion helpers Peter Maydell
2020-08-28 18:33 ` [PATCH v2 15/45] target/arm: Implement VFP fp16 VCVT between float and fixed-point Peter Maydell
2020-08-28 18:33 ` [PATCH v2 16/45] target/arm: Implement VFP vp16 VCVT-with-specified-rounding-mode Peter Maydell
2020-08-28 18:33 ` [PATCH v2 17/45] target/arm: Implement VFP fp16 VSEL Peter Maydell
2020-08-28 18:33 ` [PATCH v2 18/45] target/arm: Implement VFP fp16 VRINT* Peter Maydell
2020-08-28 18:33 ` [PATCH v2 19/45] target/arm: Implement new VFP fp16 insn VINS Peter Maydell
2020-08-28 18:33 ` [PATCH v2 20/45] target/arm: Implement new VFP fp16 insn VMOVX Peter Maydell
2020-08-28 18:33 ` [PATCH v2 21/45] target/arm: Implement VFP fp16 VMOV between gp and halfprec registers Peter Maydell
2020-08-28 18:33 ` [PATCH v2 22/45] fpu: Add float16 comparison functions Peter Maydell
2020-08-28 20:02   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 23/45] target/arm: Implement FP16 for Neon VADD, VSUB, VABD, VMUL Peter Maydell
2020-08-28 20:06   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 24/45] target/arm: Implement fp16 for Neon VRECPE, VRSQRTE using gvec Peter Maydell
2020-08-28 20:10   ` Richard Henderson
2020-08-28 21:40     ` Peter Maydell
2020-08-28 22:53       ` Richard Henderson
2020-08-29 13:53         ` Peter Maydell
2020-08-29 15:30           ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 25/45] target/arm: Implement fp16 for Neon VABS, VNEG of floats Peter Maydell
2020-08-28 20:33   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 26/45] target/arm: Implement fp16 for VCEQ, VCGE, VCGT comparisons Peter Maydell
2020-08-28 20:45   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 27/45] target/arm: Implement fp16 for VACGE, VACGT Peter Maydell
2020-08-28 20:46   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 28/45] target/arm: Implement fp16 for Neon VMAX, VMIN Peter Maydell
2020-08-28 20:46   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 29/45] target/arm: Implement fp16 for Neon VMAXNM, VMINNM Peter Maydell
2020-08-28 20:52   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 30/45] target/arm: Implement fp16 for Neon VMLA, VMLS operations Peter Maydell
2020-08-28 20:54   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 31/45] target/arm: Implement fp16 for Neon VFMA, VMFS Peter Maydell
2020-08-28 22:55   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 32/45] target/arm: Implement fp16 for Neon fp compare-vs-0 Peter Maydell
2020-08-28 22:57   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 33/45] target/arm: Implement fp16 for Neon VRECPS Peter Maydell
2020-08-28 23:02   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 34/45] target/arm: Implement fp16 for Neon VRSQRTS Peter Maydell
2020-08-28 23:03   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 35/45] target/arm: Implement fp16 for Neon pairwise fp ops Peter Maydell
2020-08-28 23:05   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 36/45] target/arm: Implement fp16 for Neon float-integer VCVT Peter Maydell
2020-08-28 23:07   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 37/45] target/arm: Convert Neon VCVT fixed-point to gvec Peter Maydell
2020-08-28 23:08   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 38/45] target/arm: Implement fp16 for Neon VCVT fixed-point Peter Maydell
2020-08-28 23:10   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 39/45] target/arm: Implement fp16 for Neon VCVT with rounding modes Peter Maydell
2020-08-28 23:13   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 40/45] target/arm: Implement fp16 for Neon VRINT-with-specified-rounding-mode Peter Maydell
2020-08-28 23:15   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 41/45] target/arm: Implement fp16 for Neon VRINTX Peter Maydell
2020-08-28 23:16   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 42/45] target/arm/vec_helper: Handle oprsz less than 16 bytes in indexed operations Peter Maydell
2020-08-28 23:17   ` Richard Henderson
2020-08-28 18:33 ` [PATCH v2 43/45] target/arm/vec_helper: Add gvec fp indexed multiply-and-add operations Peter Maydell
2020-08-28 23:24   ` Richard Henderson
2020-08-29 13:51     ` Peter Maydell
2020-08-28 18:33 ` [PATCH v2 44/45] target/arm: Implement fp16 for Neon VMUL, VMLA, VMLS Peter Maydell
2020-08-28 23:38   ` Richard Henderson
2020-08-29 13:52     ` Peter Maydell
2020-08-28 18:33 ` [PATCH v2 45/45] target/arm: Enable FP16 in '-cpu max' Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200828183354.27913-5-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.