All of lore.kernel.org
 help / color / mirror / Atom feed
From: Taylor Simpson <tsimpson@quicinc.com>
To: qemu-devel@nongnu.org
Cc: ale@rev.ng, philmd@redhat.com, tsimpson@quicinc.com,
	richard.henderson@linaro.org, bcain@quicinc.com
Subject: [PATCH v3 18/26] Hexagon (target/hexagon) add F2_sfinvsqrta
Date: Wed,  7 Apr 2021 20:57:39 -0500	[thread overview]
Message-ID: <1617847067-9867-19-git-send-email-tsimpson@quicinc.com> (raw)
In-Reply-To: <1617847067-9867-1-git-send-email-tsimpson@quicinc.com>

Rd32,Pe4 = sfinvsqrta(Rs32)
    Square root approx

The helper packs the 2 32-bit results into a 64-bit value,
and the fGEN_TCG override unpacks them into the proper results.

Test cases in tests/tcg/hexagon/multi_result.c
FP exception tests added to tests/tcg/hexagon/fpstuff.c

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/arch.c                 | 21 ++++++++++++++++++++-
 target/hexagon/arch.h                 |  2 ++
 target/hexagon/gen_tcg.h              | 16 ++++++++++++++++
 target/hexagon/helper.h               |  1 +
 target/hexagon/imported/encode_pp.def |  1 +
 target/hexagon/imported/float.idef    | 16 ++++++++++++++++
 target/hexagon/op_helper.c            | 21 +++++++++++++++++++++
 tests/tcg/hexagon/fpstuff.c           | 15 +++++++++++++++
 tests/tcg/hexagon/multi_result.c      | 29 +++++++++++++++++++++++++++++
 9 files changed, 121 insertions(+), 1 deletion(-)

diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c
index 46edf45..dee852e 100644
--- a/target/hexagon/arch.c
+++ b/target/hexagon/arch.c
@@ -247,7 +247,7 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
     int r_exp;
     int ret = 0;
     RsV = *Rs;
-    if (float32_is_infinity(RsV)) {
+    if (float32_is_any_nan(RsV)) {
         if (extract32(RsV, 22, 1) == 0) {
             float_raise(float_flag_invalid, fp_status);
         }
@@ -299,3 +299,22 @@ const uint8_t recip_lookup_table[128] = {
     0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
     0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000,
 };
+
+const uint8_t invsqrt_lookup_table[128] = {
+    0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057,
+    0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045,
+    0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036,
+    0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028,
+    0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d,
+    0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012,
+    0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
+    0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001,
+    0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4,
+    0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb,
+    0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6,
+    0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3,
+    0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093,
+    0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084,
+    0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077,
+    0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b,
+};
diff --git a/target/hexagon/arch.h b/target/hexagon/arch.h
index b6634e9..3e0c334 100644
--- a/target/hexagon/arch.h
+++ b/target/hexagon/arch.h
@@ -32,4 +32,6 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
 
 extern const uint8_t recip_lookup_table[128];
 
+extern const uint8_t invsqrt_lookup_table[128];
+
 #endif
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 428a670..d78e7b8 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -216,6 +216,22 @@
         tcg_temp_free_i64(tmp); \
     } while (0)
 
+/*
+ * Approximation of the reciprocal square root
+ * r1,p0 = sfinvsqrta(r0)
+ *
+ * The helper packs the 2 32-bit results into a 64-bit value,
+ * so unpack them into the proper results.
+ */
+#define fGEN_TCG_F2_sfinvsqrta(SHORTCODE) \
+    do { \
+        TCGv_i64 tmp = tcg_temp_new_i64(); \
+        gen_helper_sfinvsqrta(tmp, cpu_env, RsV); \
+        tcg_gen_extrh_i64_i32(RdV, tmp); \
+        tcg_gen_extrl_i64_i32(PeV, tmp); \
+        tcg_temp_free_i64(tmp); \
+    } while (0)
+
 /* Floating point */
 #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \
     gen_helper_conv_sf2df(RddV, cpu_env, RsV)
diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h
index b377293..cb7508f 100644
--- a/target/hexagon/helper.h
+++ b/target/hexagon/helper.h
@@ -25,6 +25,7 @@ DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int)
 DEF_HELPER_2(commit_store, void, env, int)
 DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32)
 DEF_HELPER_3(sfrecipa, i64, env, f32, f32)
+DEF_HELPER_2(sfinvsqrta, i64, env, f32)
 
 /* Floating point */
 DEF_HELPER_2(conv_sf2df, f64, env, f32)
diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def
index b01b4d7..18fe45d 100644
--- a/target/hexagon/imported/encode_pp.def
+++ b/target/hexagon/imported/encode_pp.def
@@ -1642,6 +1642,7 @@ SH2_RR_ENC(F2_conv_sf2w,          "1011","100","-","000","ddddd")
 SH2_RR_ENC(F2_conv_sf2uw_chop,    "1011","011","-","001","ddddd")
 SH2_RR_ENC(F2_conv_sf2w_chop,     "1011","100","-","001","ddddd")
 SH2_RR_ENC(F2_sffixupr,           "1011","101","-","000","ddddd")
+SH2_RR_ENC(F2_sfinvsqrta,         "1011","111","-","0ee","ddddd")
 
 
 DEF_FIELDROW_DESC32(ICLASS_S2op"      1100 -------- PP------ --------","[#12] Rd=(Rs,#u6)")
diff --git a/target/hexagon/imported/float.idef b/target/hexagon/imported/float.idef
index eb54158..3e75bc4 100644
--- a/target/hexagon/imported/float.idef
+++ b/target/hexagon/imported/float.idef
@@ -178,6 +178,22 @@ Q6INSN(F2_sffixupd,"Rd32=sffixupd(Rs32,Rt32)",ATTRIBS(),
     RdV = RtV;
 })
 
+Q6INSN(F2_sfinvsqrta,"Rd32,Pe4=sfinvsqrta(Rs32)",ATTRIBS(),
+"Reciprocal Square Root Approximation",
+{
+    fHIDE(int idx;)
+    fHIDE(int adjust;)
+    fHIDE(int mant;)
+    fHIDE(int exp;)
+    if (fSF_INVSQRT_COMMON(RsV,RdV,adjust)) {
+        PeV = adjust;
+        idx = (RsV >> 17) & 0x7f;
+        mant = (fSF_INVSQRT_LOOKUP(idx) << 15);
+        exp = fSF_BIAS() - ((fSF_GETEXP(RsV) - fSF_BIAS()) >> 1) - 1;
+        RdV = fMAKESF(fGETBIT(31,RsV),exp,mant);
+    }
+})
+
 Q6INSN(F2_sffixupr,"Rd32=sffixupr(Rs32)",ATTRIBS(),
 "Fix Up Radicand",
 {
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 75861e2..a25fb98 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -326,6 +326,27 @@ uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV)
     return ((uint64_t)RdV << 32) | PeV;
 }
 
+uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
+{
+    int PeV = 0;
+    float32 RdV;
+    int idx;
+    int adjust;
+    int mant;
+    int exp;
+
+    arch_fpop_start(env);
+    if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) {
+        PeV = adjust;
+        idx = (RsV >> 17) & 0x7f;
+        mant = (invsqrt_lookup_table[idx] << 15);
+        exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1;
+        RdV = build_float32(extract32(RsV, 31, 1), exp, mant);
+    }
+    arch_fpop_end(env);
+    return ((uint64_t)RdV << 32) | PeV;
+}
+
 /*
  * mem_noshuf
  * Section 5.5 of the Hexagon V67 Programmer's Reference Manual
diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c
index 8e3ba78..0dff429 100644
--- a/tests/tcg/hexagon/fpstuff.c
+++ b/tests/tcg/hexagon/fpstuff.c
@@ -441,6 +441,20 @@ static void check_canonical_NaN(void)
     check_fpstatus(usr, 0);
 }
 
+static void check_invsqrta(void)
+{
+    int result;
+    int predval;
+
+    asm volatile("%0,p0 = sfinvsqrta(%2)\n\t"
+                 "%1 = p0\n\t"
+                 : "+r"(result), "=r"(predval)
+                 : "r"(0x7f800000)
+                 : "p0");
+    check32(result, 0xff800000);
+    check32(predval, 0x0);
+}
+
 static void check_float2int_convs()
 {
     int res32;
@@ -590,6 +604,7 @@ int main()
     check_dfminmax();
     check_recip_exception();
     check_canonical_NaN();
+    check_invsqrta();
     check_float2int_convs();
 
     puts(err ? "FAIL" : "PASS");
diff --git a/tests/tcg/hexagon/multi_result.c b/tests/tcg/hexagon/multi_result.c
index cb7dd31..67aa462 100644
--- a/tests/tcg/hexagon/multi_result.c
+++ b/tests/tcg/hexagon/multi_result.c
@@ -31,6 +31,20 @@ static int sfrecipa(int Rs, int Rt, int *pred_result)
   return result;
 }
 
+static int sfinvsqrta(int Rs, int *pred_result)
+{
+  int result;
+  int predval;
+
+  asm volatile("%0,p0 = sfinvsqrta(%2)\n\t"
+               "%1 = p0\n\t"
+               : "+r"(result), "=r"(predval)
+               : "r"(Rs)
+               : "p0");
+  *pred_result = predval;
+  return result;
+}
+
 int err;
 
 static void check(int val, int expect)
@@ -59,9 +73,24 @@ static void test_sfrecipa()
     check_p(pred_result, 0x00);
 }
 
+static void test_sfinvsqrta()
+{
+    int res;
+    int pred_result;
+
+    res = sfinvsqrta(0x04030201, &pred_result);
+    check(res, 0x4d330000);
+    check_p(pred_result, 0xe0);
+
+    res = sfinvsqrta(0x0, &pred_result);
+    check(res, 0x3f800000);
+    check_p(pred_result, 0x0);
+}
+
 int main()
 {
     test_sfrecipa();
+    test_sfinvsqrta();
 
     puts(err ? "FAIL" : "PASS");
     return err;
-- 
2.7.4



  parent reply	other threads:[~2021-04-08  2:10 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-08  1:57 [PATCH v3 00/26] Hexagon (target/hexagon) update Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 01/26] Hexagon (target/hexagon) TCG generation cleanup Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 02/26] Hexagon (target/hexagon) cleanup gen_log_predicated_reg_write_pair Taylor Simpson
2021-04-08 19:05   ` Richard Henderson
2021-04-08  1:57 ` [PATCH v3 03/26] Hexagon (target/hexagon) remove unnecessary inline directives Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 04/26] Hexagon (target/hexagon) use env_archcpu and env_cpu Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 05/26] Hexagon (target/hexagon) properly generate TB end for DISAS_NORETURN Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 06/26] Hexagon (target/hexagon) decide if pred has been written at TCG gen time Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 07/26] Hexagon (target/hexagon) change variables from int to bool when appropriate Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 08/26] Hexagon (target/hexagon) remove unused carry_from_add64 function Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 09/26] Hexagon (target/hexagon) change type of softfloat_roundingmodes Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 10/26] Hexagon (target/hexagon) use softfloat default NaN and tininess Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 11/26] Hexagon (target/hexagon) replace float32_mul_pow2 with float32_scalbn Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 12/26] Hexagon (target/hexagon) use softfloat for float-to-int conversions Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 13/26] Hexagon (target/hexagon) cleanup ternary operators in semantics Taylor Simpson
2021-04-08 19:17   ` Richard Henderson
2021-04-08  1:57 ` [PATCH v3 14/26] Hexagon (target/hexagon) cleanup reg_field_info definition Taylor Simpson
2021-04-08 19:18   ` Richard Henderson
2021-04-08  1:57 ` [PATCH v3 15/26] Hexagon (target/hexagon) move QEMU_GENERATE to only be on during macros.h Taylor Simpson
2021-04-08 19:19   ` Richard Henderson
2021-04-08  1:57 ` [PATCH v3 16/26] Hexagon (target/hexagon) compile all debug code Taylor Simpson
2021-04-08 19:23   ` Richard Henderson
2021-04-08  1:57 ` [PATCH v3 17/26] Hexagon (target/hexagon) add F2_sfrecipa instruction Taylor Simpson
2021-04-08  1:57 ` Taylor Simpson [this message]
2021-04-08  1:57 ` [PATCH v3 19/26] Hexagon (target/hexagon) add A5_ACS (vacsh) Taylor Simpson
2021-04-08 19:28   ` Richard Henderson
2021-04-08  1:57 ` [PATCH v3 20/26] Hexagon (target/hexagon) add A6_vminub_RdP Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 21/26] Hexagon (target/hexagon) add A4_addp_c/A4_subp_c Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 22/26] Hexagon (target/hexagon) circular addressing Taylor Simpson
2021-04-08 19:39   ` Richard Henderson
2021-04-08  1:57 ` [PATCH v3 23/26] Hexagon (target/hexagon) bit reverse (brev) addressing Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 24/26] Hexagon (target/hexagon) load and unpack bytes instructions Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 25/26] Hexagon (target/hexagon) load into shifted register instructions Taylor Simpson
2021-04-08  1:57 ` [PATCH v3 26/26] Hexagon (target/hexagon) CABAC decode bin Taylor Simpson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1617847067-9867-19-git-send-email-tsimpson@quicinc.com \
    --to=tsimpson@quicinc.com \
    --cc=ale@rev.ng \
    --cc=bcain@quicinc.com \
    --cc=philmd@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.