qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-arm@nongnu.org, qemu-devel@nongnu.org
Subject: [PATCH 18/22] target/arm: Implement VFP fp16 VRINT*
Date: Mon, 24 Aug 2020 15:29:30 +0100	[thread overview]
Message-ID: <20200824142934.20850-19-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200824142934.20850-1-peter.maydell@linaro.org>

Implement the fp16 version of the VFP VRINT* insns.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/helper.h            |  2 +
 target/arm/vfp-uncond.decode   |  6 ++-
 target/arm/vfp.decode          |  3 ++
 target/arm/vfp_helper.c        | 21 ++++++++
 target/arm/translate-vfp.c.inc | 98 +++++++++++++++++++++++++++++++---
 5 files changed, 122 insertions(+), 8 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index eefd1ac2a72..d1315e0ef3e 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -242,8 +242,10 @@ DEF_HELPER_3(shr_cc, i32, env, i32, i32)
 DEF_HELPER_3(sar_cc, i32, env, i32, i32)
 DEF_HELPER_3(ror_cc, i32, env, i32, i32)
 
+DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)
 
diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode
index 8ba7b1703e0..9615544623a 100644
--- a/target/arm/vfp-uncond.decode
+++ b/target/arm/vfp-uncond.decode
@@ -60,10 +60,12 @@ VMINNM_sp   1111 1110 1.00 .... .... 1010 .1.0 ....         @vfp_dnm_s
 VMAXNM_dp   1111 1110 1.00 .... .... 1011 .0.0 ....         @vfp_dnm_d
 VMINNM_dp   1111 1110 1.00 .... .... 1011 .1.0 ....         @vfp_dnm_d
 
+VRINT       1111 1110 1.11 10 rm:2 .... 1001 01.0 .... \
+            vm=%vm_sp vd=%vd_sp sz=1
 VRINT       1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \
-            vm=%vm_sp vd=%vd_sp dp=0
+            vm=%vm_sp vd=%vd_sp sz=2
 VRINT       1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \
-            vm=%vm_dp vd=%vd_dp dp=1
+            vm=%vm_dp vd=%vd_dp sz=3
 
 # VCVT float to int with specified rounding mode; Vd is always single-precision
 VCVT        1111 1110 1.11 11 rm:2 .... 1001 op:1 1.0 .... \
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
index a8f1137be1e..9a79e99f1b0 100644
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@@ -195,12 +195,15 @@ VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
 VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
              vd=%vd_sp vm=%vm_dp
 
+VRINTR_hp    ---- 1110 1.11 0110 .... 1001 01.0 ....        @vfp_dm_ss
 VRINTR_sp    ---- 1110 1.11 0110 .... 1010 01.0 ....        @vfp_dm_ss
 VRINTR_dp    ---- 1110 1.11 0110 .... 1011 01.0 ....        @vfp_dm_dd
 
+VRINTZ_hp    ---- 1110 1.11 0110 .... 1001 11.0 ....        @vfp_dm_ss
 VRINTZ_sp    ---- 1110 1.11 0110 .... 1010 11.0 ....        @vfp_dm_ss
 VRINTZ_dp    ---- 1110 1.11 0110 .... 1011 11.0 ....        @vfp_dm_dd
 
+VRINTX_hp    ---- 1110 1.11 0111 .... 1001 01.0 ....        @vfp_dm_ss
 VRINTX_sp    ---- 1110 1.11 0111 .... 1010 01.0 ....        @vfp_dm_ss
 VRINTX_dp    ---- 1110 1.11 0111 .... 1011 01.0 ....        @vfp_dm_dd
 
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index c88ace3c566..5b8b4219615 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -1018,6 +1018,11 @@ float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
 }
 
 /* ARMv8 round to integral */
+float32 HELPER(rinth_exact)(float32 x, void *fp_status)
+{
+    return float16_round_to_int(x, fp_status);
+}
+
 float32 HELPER(rints_exact)(float32 x, void *fp_status)
 {
     return float32_round_to_int(x, fp_status);
@@ -1028,6 +1033,22 @@ float64 HELPER(rintd_exact)(float64 x, void *fp_status)
     return float64_round_to_int(x, fp_status);
 }
 
+float32 HELPER(rinth)(float32 x, void *fp_status)
+{
+    int old_flags = get_float_exception_flags(fp_status), new_flags;
+    float32 ret;
+
+    ret = float16_round_to_int(x, fp_status);
+
+    /* Suppress any inexact exceptions the conversion produced */
+    if (!(old_flags & float_flag_inexact)) {
+        new_flags = get_float_exception_flags(fp_status);
+        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+    }
+
+    return ret;
+}
+
 float32 HELPER(rints)(float32 x, void *fp_status)
 {
     int old_flags = get_float_exception_flags(fp_status), new_flags;
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
index 869b67b2b93..7ce044fa896 100644
--- a/target/arm/translate-vfp.c.inc
+++ b/target/arm/translate-vfp.c.inc
@@ -341,7 +341,7 @@ static const uint8_t fp_decode_rm[] = {
 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 {
     uint32_t rd, rm;
-    bool dp = a->dp;
+    int sz = a->sz;
     TCGv_ptr fpst;
     TCGv_i32 tcg_rmode;
     int rounding = fp_decode_rm[a->rm];
@@ -350,12 +350,16 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
         return false;
     }
 
-    if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
+    if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
         return false;
     }
 
     /* UNDEF accesses to D16-D31 if they don't exist */
-    if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
+    if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
         ((a->vm | a->vd) & 0x10)) {
         return false;
     }
@@ -367,12 +371,16 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
         return true;
     }
 
-    fpst = fpstatus_ptr(FPST_FPCR);
+    if (sz == 1) {
+        fpst = fpstatus_ptr(FPST_FPCR_F16);
+    } else {
+        fpst = fpstatus_ptr(FPST_FPCR);
+    }
 
     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 
-    if (dp) {
+    if (sz == 3) {
         TCGv_i64 tcg_op;
         TCGv_i64 tcg_res;
         tcg_op = tcg_temp_new_i64();
@@ -388,7 +396,11 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
         tcg_op = tcg_temp_new_i32();
         tcg_res = tcg_temp_new_i32();
         neon_load_reg32(tcg_op, rm);
-        gen_helper_rints(tcg_res, tcg_op, fpst);
+        if (sz == 1) {
+            gen_helper_rinth(tcg_res, tcg_op, fpst);
+        } else {
+            gen_helper_rints(tcg_res, tcg_op, fpst);
+        }
         neon_store_reg32(tcg_res, rd);
         tcg_temp_free_i32(tcg_op);
         tcg_temp_free_i32(tcg_res);
@@ -2638,6 +2650,29 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
     return true;
 }
 
+static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32();
+    neon_load_reg32(tmp, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+    gen_helper_rinth(tmp, tmp, fpst);
+    neon_store_reg32(tmp, a->vd);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
 {
     TCGv_ptr fpst;
@@ -2693,6 +2728,34 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
     return true;
 }
 
+static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+    TCGv_i32 tcg_rmode;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32();
+    neon_load_reg32(tmp, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+    tcg_rmode = tcg_const_i32(float_round_to_zero);
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+    gen_helper_rinth(tmp, tmp, fpst);
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+    neon_store_reg32(tmp, a->vd);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tcg_rmode);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
 {
     TCGv_ptr fpst;
@@ -2758,6 +2821,29 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
     return true;
 }
 
+static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32();
+    neon_load_reg32(tmp, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+    gen_helper_rinth_exact(tmp, tmp, fpst);
+    neon_store_reg32(tmp, a->vd);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
 {
     TCGv_ptr fpst;
-- 
2.20.1



  parent reply	other threads:[~2020-08-24 14:38 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-24 14:29 [PATCH 00/22] target/arm: Implement fp16 for AArch32 VFP Peter Maydell
2020-08-24 14:29 ` [PATCH 01/22] target/arm: Remove local definitions of float constants Peter Maydell
2020-08-25 18:04   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 02/22] target/arm: Use correct ID register check for aa32_fp16_arith Peter Maydell
2020-08-25 18:06   ` Richard Henderson
2020-08-27 13:46     ` Peter Maydell
2020-08-24 14:29 ` [PATCH 03/22] target/arm: Implement VFP fp16 for VFP_BINOP operations Peter Maydell
2020-08-25 18:14   ` Richard Henderson
2020-08-27 13:39     ` Peter Maydell
2020-08-24 14:29 ` [PATCH 04/22] target/arm: Implement VFP fp16 VMLA, VMLS, VNMLS, VNMLA, VNMUL Peter Maydell
2020-08-25 18:18   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 05/22] target/arm: Macroify trans functions for VFMA, VFMS, VFNMA, VFNMS Peter Maydell
2020-08-25 18:19   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 06/22] target/arm: Implement VFP fp16 for fused-multiply-add Peter Maydell
2020-08-25 18:21   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 07/22] target/arm: Macroify uses of do_vfp_2op_sp() and do_vfp_2op_dp() Peter Maydell
2020-08-25 18:22   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 08/22] target/arm: Implement VFP fp16 for VABS, VNEG, VSQRT Peter Maydell
2020-08-25 18:24   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 09/22] target/arm: Implement VFP fp16 for VMOV immediate Peter Maydell
2020-08-25 18:25   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 10/22] target/arm: Implement VFP fp16 VCMP Peter Maydell
2020-08-25 18:39   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 11/22] target/arm: Implement VFP fp16 VLDR and VSTR Peter Maydell
2020-08-25 18:44   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 12/22] target/arm: Implement VFP fp16 VCVT between float and integer Peter Maydell
2020-08-25 18:45   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 13/22] target/arm: Make VFP_CONV_FIX macros take separate float type and float size Peter Maydell
2020-08-25 18:47   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 14/22] target/arm: Use macros instead of open-coding fp16 conversion helpers Peter Maydell
2020-08-25 18:48   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 15/22] target/arm: Implement VFP fp16 VCVT between float and fixed-point Peter Maydell
2020-08-25 18:49   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 16/22] target/arm: Implement VFP vp16 VCVT-with-specified-rounding-mode Peter Maydell
2020-08-25 18:51   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 17/22] target/arm: Implement VFP fp16 VSEL Peter Maydell
2020-08-25 19:19   ` Richard Henderson
2020-08-24 14:29 ` Peter Maydell [this message]
2020-08-25 19:21   ` [PATCH 18/22] target/arm: Implement VFP fp16 VRINT* Richard Henderson
2020-08-24 14:29 ` [PATCH 19/22] target/arm: Implement new VFP fp16 insn VINS Peter Maydell
2020-08-25 19:23   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 20/22] target/arm: Implement new VFP fp16 insn VMOVX Peter Maydell
2020-08-25 19:25   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 21/22] target/arm: Implement VFP fp16 VMOV between gp and halfprec registers Peter Maydell
2020-08-25 19:29   ` Richard Henderson
2020-08-24 14:29 ` [PATCH 22/22] target/arm: Enable FP16 in '-cpu max' Peter Maydell
2020-08-25 19:30   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200824142934.20850-19-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).