All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Emilio G. Cota" <cota@braap.org>
To: qemu-devel@nongnu.org
Cc: "Aurelien Jarno" <aurelien@aurel32.net>,
	"Peter Maydell" <peter.maydell@linaro.org>,
	"Alex Bennée" <alex.bennee@linaro.org>,
	"Laurent Vivier" <laurent@vivier.eu>,
	"Richard Henderson" <richard.henderson@linaro.org>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Mark Cave-Ayland" <mark.cave-ayland@ilande.co.uk>
Subject: [Qemu-devel] [PATCH v2 08/14] hardfloat: support float32/64 addition and subtraction
Date: Tue, 27 Mar 2018 01:33:54 -0400	[thread overview]
Message-ID: <1522128840-498-9-git-send-email-cota@braap.org> (raw)
In-Reply-To: <1522128840-498-1-git-send-email-cota@braap.org>

Note that for float32 we do most checks on the float32 and not on
the native type; for float64 we do the opposite. This is faster
than going either way for both, as shown below.

I am keeping both macro-based definitions to ease testing of
either option.

Performance results (single and double precision) for fp-bench
run under aarch64-linux-user on an Intel(R) Core(TM) i7-4790K
CPU @ 4.00GHz host:

- before:
add-single: 86.74 MFlops
add-double: 86.46 MFlops
sub-single: 83.33 MFlops
sub-double: 84.57 MFlops

- after this commit:
add-single: 188.89 MFlops
add-double: 172.27 MFlops
sub-single: 187.69 MFlops
sub-double: 171.89 MFlops

- w/ both using float32/64_is_normal etc.:
add-single: 187.63 MFlops
add-double: 143.51 MFlops
sub-single: 187.91 MFlops
sub-double: 144.23 MFlops

- w/ both using fpclassify etc.:
add-single: 166.61 MFlops
add-double: 172.32 MFlops
sub-single: 169.13 MFlops
sub-double: 173.09 MFlops

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 fpu/softfloat.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 112 insertions(+), 8 deletions(-)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index ffe16b2..e0ab0ca 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -829,8 +829,8 @@ float16  __attribute__((flatten)) float16_add(float16 a, float16 b,
     return float16_round_pack_canonical(pr, status);
 }
 
-float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
-                                             float_status *status)
+static float32 __attribute__((flatten, noinline))
+soft_float32_add(float32 a, float32 b, float_status *status)
 {
     FloatParts pa = float32_unpack_canonical(a, status);
     FloatParts pb = float32_unpack_canonical(b, status);
@@ -839,8 +839,8 @@ float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
     return float32_round_pack_canonical(pr, status);
 }
 
-float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
-                                             float_status *status)
+static float64 __attribute__((flatten, noinline))
+soft_float64_add(float64 a, float64 b, float_status *status)
 {
     FloatParts pa = float64_unpack_canonical(a, status);
     FloatParts pb = float64_unpack_canonical(b, status);
@@ -859,8 +859,8 @@ float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
     return float16_round_pack_canonical(pr, status);
 }
 
-float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
-                                             float_status *status)
+static float32 __attribute__((flatten, noinline))
+soft_float32_sub(float32 a, float32 b, float_status *status)
 {
     FloatParts pa = float32_unpack_canonical(a, status);
     FloatParts pb = float32_unpack_canonical(b, status);
@@ -869,8 +869,8 @@ float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
     return float32_round_pack_canonical(pr, status);
 }
 
-float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
-                                             float_status *status)
+static float64 __attribute__((flatten, noinline))
+soft_float64_sub(float64 a, float64 b, float_status *status)
 {
     FloatParts pa = float64_unpack_canonical(a, status);
     FloatParts pb = float64_unpack_canonical(b, status);
@@ -879,6 +879,110 @@ float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
     return float64_round_pack_canonical(pr, status);
 }
 
+#define GEN_FPU_ADDSUB(add_name, sub_name, soft_t, host_t,              \
+                       host_abs_func, min_normal)                       \
+    static inline __attribute__((always_inline)) soft_t                 \
+    fpu_ ## soft_t ## _addsub(soft_t a, soft_t b, bool subtract,        \
+                              float_status *s)                          \
+    {                                                                   \
+        soft_t ## _input_flush2(&a, &b, s);                             \
+        if (likely((soft_t ## _is_normal(a) || soft_t ## _is_zero(a)) && \
+                   (soft_t ## _is_normal(b) || soft_t ## _is_zero(b)) && \
+                   s->float_exception_flags & float_flag_inexact &&     \
+                   s->float_rounding_mode == float_round_nearest_even)) { \
+            host_t ha = soft_t ## _to_ ## host_t(a);                    \
+            host_t hb = soft_t ## _to_ ## host_t(b);                    \
+            host_t hr;                                                  \
+            soft_t r;                                                   \
+                                                                        \
+            if (subtract) {                                             \
+                hb = -hb;                                               \
+            }                                                           \
+            hr = ha + hb;                                               \
+            r = host_t ## _to_ ## soft_t(hr);                           \
+            if (unlikely(soft_t ## _is_infinity(r))) {                  \
+                s->float_exception_flags |= float_flag_overflow;        \
+            } else if (unlikely(host_abs_func(hr) <= min_normal) &&     \
+                       !(soft_t ## _is_zero(a) &&                       \
+                         soft_t ## _is_zero(b))) {                      \
+                goto soft;                                              \
+            }                                                           \
+            return r;                                                   \
+        }                                                               \
+    soft:                                                               \
+        if (subtract) {                                                 \
+            return soft_ ## soft_t ## _sub(a, b, s);                    \
+        } else {                                                        \
+            return soft_ ## soft_t ## _add(a, b, s);                    \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    soft_t add_name(soft_t a, soft_t b, float_status *status)           \
+    {                                                                   \
+        return fpu_ ## soft_t ## _addsub(a, b, false, status);          \
+    }                                                                   \
+                                                                        \
+    soft_t sub_name(soft_t a, soft_t b, float_status *status)           \
+    {                                                                   \
+        return fpu_ ## soft_t ## _addsub(a, b, true, status);           \
+    }
+
+GEN_FPU_ADDSUB(float32_add, float32_sub, float32, float, fabsf, FLT_MIN)
+#undef GEN_FPU_ADDSUB
+
+#define GEN_FPU_ADDSUB(add_name, sub_name, soft_t, host_t,              \
+                       host_abs_func, min_normal)                       \
+    static inline __attribute__((always_inline)) soft_t                 \
+    fpu_ ## soft_t ## _addsub(soft_t a, soft_t b, bool subtract,        \
+                              float_status *s)                          \
+    {                                                                   \
+        double ha, hb;                                                  \
+                                                                        \
+        soft_t ## _input_flush2(&a, &b, s);                             \
+        ha = soft_t ## _to_ ## host_t(a);                               \
+        hb = soft_t ## _to_ ## host_t(b);                               \
+        if (likely((fpclassify(ha) == FP_NORMAL ||                      \
+                    fpclassify(ha) == FP_ZERO) &&                       \
+                   (fpclassify(hb) == FP_NORMAL ||                      \
+                    fpclassify(hb) == FP_ZERO) &&                       \
+                   s->float_exception_flags & float_flag_inexact &&     \
+                   s->float_rounding_mode == float_round_nearest_even)) { \
+            host_t hr;                                                  \
+                                                                        \
+            if (subtract) {                                             \
+                hb = -hb;                                               \
+            }                                                           \
+            hr = ha + hb;                                               \
+            if (unlikely(isinf(hr))) {                                  \
+                s->float_exception_flags |= float_flag_overflow;        \
+            } else if (unlikely(host_abs_func(hr) <= min_normal) &&     \
+                       !(soft_t ## _is_zero(a) &&                       \
+                         soft_t ## _is_zero(b))) {                      \
+                goto soft;                                              \
+            }                                                           \
+            return host_t ## _to_ ## soft_t(hr);                        \
+        }                                                               \
+    soft:                                                               \
+        if (subtract) {                                                 \
+            return soft_ ## soft_t ## _sub(a, b, s);                    \
+        } else {                                                        \
+            return soft_ ## soft_t ## _add(a, b, s);                    \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    soft_t add_name(soft_t a, soft_t b, float_status *status)           \
+    {                                                                   \
+        return fpu_ ## soft_t ## _addsub(a, b, false, status);          \
+    }                                                                   \
+                                                                        \
+    soft_t sub_name(soft_t a, soft_t b, float_status *status)           \
+    {                                                                   \
+        return fpu_ ## soft_t ## _addsub(a, b, true, status);           \
+    }
+
+GEN_FPU_ADDSUB(float64_add, float64_sub, float64, double, fabs, DBL_MIN)
+#undef GEN_FPU_ADDSUB
+
 /*
  * Returns the result of multiplying the floating-point values `a' and
  * `b'. The operation is performed according to the IEC/IEEE Standard
-- 
2.7.4

  parent reply	other threads:[~2018-03-27  5:34 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-27  5:33 [Qemu-devel] [PATCH v2 00/14] fp-test + hardfloat Emilio G. Cota
2018-03-27  5:33 ` [Qemu-devel] [PATCH v2 01/14] tests: add fp-bench, a collection of simple floating-point microbenchmarks Emilio G. Cota
2018-03-27  5:33 ` [Qemu-devel] [PATCH v2 02/14] tests: add fp-test, a floating point test suite Emilio G. Cota
2018-03-27  5:33 ` [Qemu-devel] [PATCH v2 03/14] softfloat: fix {min, max}nummag for same-abs-value inputs Emilio G. Cota
2018-03-27  5:33 ` [Qemu-devel] [PATCH v2 04/14] fp-test: add muladd variants Emilio G. Cota
2018-03-27  5:33 ` [Qemu-devel] [PATCH v2 05/14] softfloat: add float{32, 64}_is_{de, }normal Emilio G. Cota
2018-03-27  5:33 ` [Qemu-devel] [PATCH v2 06/14] target/tricore: use float32_is_denormal Emilio G. Cota
2018-03-27  5:33 ` [Qemu-devel] [PATCH v2 07/14] fpu: introduce hardfloat Emilio G. Cota
2018-03-27  5:33 ` Emilio G. Cota [this message]
2018-03-28 10:17   ` [Qemu-devel] [PATCH v2 08/14] hardfloat: support float32/64 addition and subtraction Alex Bennée
2018-03-27  5:33 ` [Qemu-devel] [PATCH v2 09/14] hardfloat: support float32/64 multiplication Emilio G. Cota
2018-03-28 13:26   ` Alex Bennée
2018-03-28 22:25     ` Emilio G. Cota
2018-03-29 10:00       ` Alex Bennée
2018-03-27  5:33 ` [Qemu-devel] [PATCH v2 10/14] hardfloat: support float32/64 division Emilio G. Cota
2018-03-27  5:33 ` [Qemu-devel] [PATCH v2 11/14] hardfloat: support float32/64 fused multiply-add Emilio G. Cota
2018-03-27  5:33 ` [Qemu-devel] [PATCH v2 12/14] hardfloat: support float32/64 square root Emilio G. Cota
2018-03-27  5:33 ` [Qemu-devel] [PATCH v2 13/14] hardfloat: support float32/64 comparison Emilio G. Cota
2018-03-27  5:34 ` [Qemu-devel] [PATCH v2 14/14] hardfloat: support float32_to_float64 Emilio G. Cota
2018-03-27  9:56 ` [Qemu-devel] [PATCH v2 00/14] fp-test + hardfloat Bastian Koppelmann
2018-03-27 10:06   ` Bastian Koppelmann
2018-03-27 17:05     ` [Qemu-devel] [PATCH] softfloat: rename canonicalize to sf_canonicalize Emilio G. Cota
2018-03-28 13:36 ` [Qemu-devel] [PATCH v2 00/14] fp-test + hardfloat Alex Bennée
2018-03-29  9:59 ` no-reply
2018-03-30  6:35 ` no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1522128840-498-9-git-send-email-cota@braap.org \
    --to=cota@braap.org \
    --cc=alex.bennee@linaro.org \
    --cc=aurelien@aurel32.net \
    --cc=laurent@vivier.eu \
    --cc=mark.cave-ayland@ilande.co.uk \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.