* [PULL 01/46] qemu/host-utils: Use __builtin_bitreverseN
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 02/46] qemu/host-utils: Add wrappers for overflow builtins Richard Henderson
` (46 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée, David Hildenbrand
Clang has added some builtins for these operations;
use them if available.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/qemu/host-utils.h | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index cdca2991d8..f1e52851e0 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -272,6 +272,9 @@ static inline int ctpop64(uint64_t val)
*/
static inline uint8_t revbit8(uint8_t x)
{
+#if __has_builtin(__builtin_bitreverse8)
+ return __builtin_bitreverse8(x);
+#else
/* Assign the correct nibble position. */
x = ((x & 0xf0) >> 4)
| ((x & 0x0f) << 4);
@@ -281,6 +284,7 @@ static inline uint8_t revbit8(uint8_t x)
| ((x & 0x22) << 1)
| ((x & 0x11) << 3);
return x;
+#endif
}
/**
@@ -289,6 +293,9 @@ static inline uint8_t revbit8(uint8_t x)
*/
static inline uint16_t revbit16(uint16_t x)
{
+#if __has_builtin(__builtin_bitreverse16)
+ return __builtin_bitreverse16(x);
+#else
/* Assign the correct byte position. */
x = bswap16(x);
/* Assign the correct nibble position. */
@@ -300,6 +307,7 @@ static inline uint16_t revbit16(uint16_t x)
| ((x & 0x2222) << 1)
| ((x & 0x1111) << 3);
return x;
+#endif
}
/**
@@ -308,6 +316,9 @@ static inline uint16_t revbit16(uint16_t x)
*/
static inline uint32_t revbit32(uint32_t x)
{
+#if __has_builtin(__builtin_bitreverse32)
+ return __builtin_bitreverse32(x);
+#else
/* Assign the correct byte position. */
x = bswap32(x);
/* Assign the correct nibble position. */
@@ -319,6 +330,7 @@ static inline uint32_t revbit32(uint32_t x)
| ((x & 0x22222222u) << 1)
| ((x & 0x11111111u) << 3);
return x;
+#endif
}
/**
@@ -327,6 +339,9 @@ static inline uint32_t revbit32(uint32_t x)
*/
static inline uint64_t revbit64(uint64_t x)
{
+#if __has_builtin(__builtin_bitreverse64)
+ return __builtin_bitreverse64(x);
+#else
/* Assign the correct byte position. */
x = bswap64(x);
/* Assign the correct nibble position. */
@@ -338,6 +353,7 @@ static inline uint64_t revbit64(uint64_t x)
| ((x & 0x2222222222222222ull) << 1)
| ((x & 0x1111111111111111ull) << 3);
return x;
+#endif
}
/* Host type specific sizes of these routines. */
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 02/46] qemu/host-utils: Add wrappers for overflow builtins
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
2021-05-16 12:33 ` [PULL 01/46] qemu/host-utils: Use __builtin_bitreverseN Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 03/46] qemu/host-utils: Add wrappers for carry builtins Richard Henderson
` (45 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
These builtins came in with gcc 5 and clang 3.8, which are
slightly newer than our supported minimum compiler versions.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/qemu/host-utils.h | 225 ++++++++++++++++++++++++++++++++++++++
1 file changed, 225 insertions(+)
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index f1e52851e0..cb95626c7d 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -356,6 +356,231 @@ static inline uint64_t revbit64(uint64_t x)
#endif
}
+/**
+ * sadd32_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+ return __builtin_add_overflow(x, y, ret);
+#else
+ *ret = x + y;
+ return ((*ret ^ x) & ~(x ^ y)) < 0;
+#endif
+}
+
+/**
+ * sadd64_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+ return __builtin_add_overflow(x, y, ret);
+#else
+ *ret = x + y;
+ return ((*ret ^ x) & ~(x ^ y)) < 0;
+#endif
+}
+
+/**
+ * uadd32_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+ return __builtin_add_overflow(x, y, ret);
+#else
+ *ret = x + y;
+ return *ret < x;
+#endif
+}
+
+/**
+ * uadd64_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+ return __builtin_add_overflow(x, y, ret);
+#else
+ *ret = x + y;
+ return *ret < x;
+#endif
+}
+
+/**
+ * ssub32_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for difference
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+ return __builtin_sub_overflow(x, y, ret);
+#else
+ *ret = x - y;
+ return ((*ret ^ x) & (x ^ y)) < 0;
+#endif
+}
+
+/**
+ * ssub64_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+ return __builtin_sub_overflow(x, y, ret);
+#else
+ *ret = x - y;
+ return ((*ret ^ x) & (x ^ y)) < 0;
+#endif
+}
+
+/**
+ * usub32_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+ return __builtin_sub_overflow(x, y, ret);
+#else
+ *ret = x - y;
+ return x < y;
+#endif
+}
+
+/**
+ * usub64_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+ return __builtin_sub_overflow(x, y, ret);
+#else
+ *ret = x - y;
+ return x < y;
+#endif
+}
+
+/**
+ * smul32_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+ return __builtin_mul_overflow(x, y, ret);
+#else
+ int64_t z = (int64_t)x * y;
+ *ret = z;
+ return *ret != z;
+#endif
+}
+
+/**
+ * smul64_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+ return __builtin_mul_overflow(x, y, ret);
+#else
+ uint64_t hi, lo;
+ muls64(&lo, &hi, x, y);
+ *ret = lo;
+ return hi != ((int64_t)lo >> 63);
+#endif
+}
+
+/**
+ * umul32_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+ return __builtin_mul_overflow(x, y, ret);
+#else
+ uint64_t z = (uint64_t)x * y;
+ *ret = z;
+ return z > UINT32_MAX;
+#endif
+}
+
+/**
+ * umul64_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+ return __builtin_mul_overflow(x, y, ret);
+#else
+ uint64_t hi;
+ mulu64(ret, &hi, x, y);
+ return hi != 0;
+#endif
+}
+
/* Host type specific sizes of these routines. */
#if ULONG_MAX == UINT32_MAX
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 03/46] qemu/host-utils: Add wrappers for carry builtins
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
2021-05-16 12:33 ` [PULL 01/46] qemu/host-utils: Use __builtin_bitreverseN Richard Henderson
2021-05-16 12:33 ` [PULL 02/46] qemu/host-utils: Add wrappers for overflow builtins Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 04/46] accel/tcg: Use add/sub overflow routines in tcg-runtime-gvec.c Richard Henderson
` (44 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
These builtins came in clang 3.8, but are not present in gcc through
version 11. Even in clang the optimization is only ideal on x86_64,
but never worse than the hand-coding that we currently do.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/qemu/host-utils.h | 50 +++++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index cb95626c7d..711b221704 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -26,6 +26,7 @@
#ifndef HOST_UTILS_H
#define HOST_UTILS_H
+#include "qemu/compiler.h"
#include "qemu/bswap.h"
#ifdef CONFIG_INT128
@@ -581,6 +582,55 @@ static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
#endif
}
+/**
+ * uadd64_carry - addition with carry-in and carry-out
+ * @x, @y: addends
+ * @pcarry: in-out carry value
+ *
+ * Computes @x + @y + *@pcarry, placing the carry-out back
+ * into *@pcarry and returning the 64-bit sum.
+ */
+static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
+{
+#if __has_builtin(__builtin_addcll)
+ unsigned long long c = *pcarry;
+ x = __builtin_addcll(x, y, c, &c);
+ *pcarry = c & 1;
+ return x;
+#else
+ bool c = *pcarry;
+ /* This is clang's internal expansion of __builtin_addc. */
+ c = uadd64_overflow(x, c, &x);
+ c |= uadd64_overflow(x, y, &x);
+ *pcarry = c;
+ return x;
+#endif
+}
+
+/**
+ * usub64_borrow - subtraction with borrow-in and borrow-out
+ * @x, @y: addends
+ * @pborrow: in-out borrow value
+ *
+ * Computes @x - @y - *@pborrow, placing the borrow-out back
+ * into *@pborrow and returning the 64-bit sum.
+ */
+static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
+{
+#if __has_builtin(__builtin_subcll)
+ unsigned long long b = *pborrow;
+ x = __builtin_subcll(x, y, b, &b);
+ *pborrow = b & 1;
+ return x;
+#else
+ bool b = *pborrow;
+ b = usub64_overflow(x, b, &x);
+ b |= usub64_overflow(x, y, &x);
+ *pborrow = b;
+ return x;
+#endif
+}
+
/* Host type specific sizes of these routines. */
#if ULONG_MAX == UINT32_MAX
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 04/46] accel/tcg: Use add/sub overflow routines in tcg-runtime-gvec.c
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (2 preceding siblings ...)
2021-05-16 12:33 ` [PULL 03/46] qemu/host-utils: Add wrappers for carry builtins Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 05/46] tests/fp: add quad support to the benchmark utility Richard Henderson
` (43 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel
Cc: peter.maydell, Alex Bennée, Philippe Mathieu-Daudé,
David Hildenbrand
Obvious uses of the new functions.
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/tcg-runtime-gvec.c | 36 ++++++++++++++++--------------------
1 file changed, 16 insertions(+), 20 deletions(-)
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index 521da4a813..ac7d28c251 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -1073,9 +1073,8 @@ void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
for (i = 0; i < oprsz; i += sizeof(int32_t)) {
int32_t ai = *(int32_t *)(a + i);
int32_t bi = *(int32_t *)(b + i);
- int32_t di = ai + bi;
- if (((di ^ ai) &~ (ai ^ bi)) < 0) {
- /* Signed overflow. */
+ int32_t di;
+ if (sadd32_overflow(ai, bi, &di)) {
di = (di < 0 ? INT32_MAX : INT32_MIN);
}
*(int32_t *)(d + i) = di;
@@ -1091,9 +1090,8 @@ void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
for (i = 0; i < oprsz; i += sizeof(int64_t)) {
int64_t ai = *(int64_t *)(a + i);
int64_t bi = *(int64_t *)(b + i);
- int64_t di = ai + bi;
- if (((di ^ ai) &~ (ai ^ bi)) < 0) {
- /* Signed overflow. */
+ int64_t di;
+ if (sadd64_overflow(ai, bi, &di)) {
di = (di < 0 ? INT64_MAX : INT64_MIN);
}
*(int64_t *)(d + i) = di;
@@ -1143,9 +1141,8 @@ void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
for (i = 0; i < oprsz; i += sizeof(int32_t)) {
int32_t ai = *(int32_t *)(a + i);
int32_t bi = *(int32_t *)(b + i);
- int32_t di = ai - bi;
- if (((di ^ ai) & (ai ^ bi)) < 0) {
- /* Signed overflow. */
+ int32_t di;
+ if (ssub32_overflow(ai, bi, &di)) {
di = (di < 0 ? INT32_MAX : INT32_MIN);
}
*(int32_t *)(d + i) = di;
@@ -1161,9 +1158,8 @@ void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
for (i = 0; i < oprsz; i += sizeof(int64_t)) {
int64_t ai = *(int64_t *)(a + i);
int64_t bi = *(int64_t *)(b + i);
- int64_t di = ai - bi;
- if (((di ^ ai) & (ai ^ bi)) < 0) {
- /* Signed overflow. */
+ int64_t di;
+ if (ssub64_overflow(ai, bi, &di)) {
di = (di < 0 ? INT64_MAX : INT64_MIN);
}
*(int64_t *)(d + i) = di;
@@ -1209,8 +1205,8 @@ void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
uint32_t ai = *(uint32_t *)(a + i);
uint32_t bi = *(uint32_t *)(b + i);
- uint32_t di = ai + bi;
- if (di < ai) {
+ uint32_t di;
+ if (uadd32_overflow(ai, bi, &di)) {
di = UINT32_MAX;
}
*(uint32_t *)(d + i) = di;
@@ -1226,8 +1222,8 @@ void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
uint64_t ai = *(uint64_t *)(a + i);
uint64_t bi = *(uint64_t *)(b + i);
- uint64_t di = ai + bi;
- if (di < ai) {
+ uint64_t di;
+ if (uadd64_overflow(ai, bi, &di)) {
di = UINT64_MAX;
}
*(uint64_t *)(d + i) = di;
@@ -1273,8 +1269,8 @@ void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
uint32_t ai = *(uint32_t *)(a + i);
uint32_t bi = *(uint32_t *)(b + i);
- uint32_t di = ai - bi;
- if (ai < bi) {
+ uint32_t di;
+ if (usub32_overflow(ai, bi, &di)) {
di = 0;
}
*(uint32_t *)(d + i) = di;
@@ -1290,8 +1286,8 @@ void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
uint64_t ai = *(uint64_t *)(a + i);
uint64_t bi = *(uint64_t *)(b + i);
- uint64_t di = ai - bi;
- if (ai < bi) {
+ uint64_t di;
+ if (usub64_overflow(ai, bi, &di)) {
di = 0;
}
*(uint64_t *)(d + i) = di;
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 05/46] tests/fp: add quad support to the benchmark utility
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (3 preceding siblings ...)
2021-05-16 12:33 ` [PULL 04/46] accel/tcg: Use add/sub overflow routines in tcg-runtime-gvec.c Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 06/46] softfloat: Move the binary point to the msb Richard Henderson
` (42 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée, David Hildenbrand
From: Alex Bennée <alex.bennee@linaro.org>
Currently this only support softfloat calculations because working out
if the hardware supports 128 bit floats needs configure magic. The 3
op muladd operation is currently unimplemented so commented out for
now.
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20201020163738.27700-8-alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tests/fp/fp-bench.c | 88 ++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 83 insertions(+), 5 deletions(-)
diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c
index 4ba5e1d2d4..d319993280 100644
--- a/tests/fp/fp-bench.c
+++ b/tests/fp/fp-bench.c
@@ -14,6 +14,7 @@
#include <math.h>
#include <fenv.h>
#include "qemu/timer.h"
+#include "qemu/int128.h"
#include "fpu/softfloat.h"
/* amortize the computation of random inputs */
@@ -50,8 +51,10 @@ static const char * const op_names[] = {
enum precision {
PREC_SINGLE,
PREC_DOUBLE,
+ PREC_QUAD,
PREC_FLOAT32,
PREC_FLOAT64,
+ PREC_FLOAT128,
PREC_MAX_NR,
};
@@ -89,6 +92,7 @@ union fp {
double d;
float32 f32;
float64 f64;
+ float128 f128;
uint64_t u64;
};
@@ -113,6 +117,10 @@ struct op_desc {
static uint64_t random_ops[MAX_OPERANDS] = {
SEED_A, SEED_B, SEED_C,
};
+
+static float128 random_quad_ops[MAX_OPERANDS] = {
+ {SEED_A, SEED_B}, {SEED_B, SEED_C}, {SEED_C, SEED_A},
+};
static float_status soft_status;
static enum precision precision;
static enum op operation;
@@ -141,25 +149,45 @@ static void update_random_ops(int n_ops, enum precision prec)
int i;
for (i = 0; i < n_ops; i++) {
- uint64_t r = random_ops[i];
switch (prec) {
case PREC_SINGLE:
case PREC_FLOAT32:
+ {
+ uint64_t r = random_ops[i];
do {
r = xorshift64star(r);
} while (!float32_is_normal(r));
+ random_ops[i] = r;
break;
+ }
case PREC_DOUBLE:
case PREC_FLOAT64:
+ {
+ uint64_t r = random_ops[i];
do {
r = xorshift64star(r);
} while (!float64_is_normal(r));
+ random_ops[i] = r;
break;
+ }
+ case PREC_QUAD:
+ case PREC_FLOAT128:
+ {
+ float128 r = random_quad_ops[i];
+ uint64_t hi = r.high;
+ uint64_t lo = r.low;
+ do {
+ hi = xorshift64star(hi);
+ lo = xorshift64star(lo);
+ r = make_float128(hi, lo);
+ } while (!float128_is_normal(r));
+ random_quad_ops[i] = r;
+ break;
+ }
default:
g_assert_not_reached();
}
- random_ops[i] = r;
}
}
@@ -184,6 +212,13 @@ static void fill_random(union fp *ops, int n_ops, enum precision prec,
ops[i].f64 = float64_chs(ops[i].f64);
}
break;
+ case PREC_QUAD:
+ case PREC_FLOAT128:
+ ops[i].f128 = random_quad_ops[i];
+ if (no_neg && float128_is_neg(ops[i].f128)) {
+ ops[i].f128 = float128_chs(ops[i].f128);
+ }
+ break;
default:
g_assert_not_reached();
}
@@ -345,6 +380,41 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
}
}
break;
+ case PREC_FLOAT128:
+ fill_random(ops, n_ops, prec, no_neg);
+ t0 = get_clock();
+ for (i = 0; i < OPS_PER_ITER; i++) {
+ float128 a = ops[0].f128;
+ float128 b = ops[1].f128;
+ /* float128 c = ops[2].f128; */
+
+ switch (op) {
+ case OP_ADD:
+ res.f128 = float128_add(a, b, &soft_status);
+ break;
+ case OP_SUB:
+ res.f128 = float128_sub(a, b, &soft_status);
+ break;
+ case OP_MUL:
+ res.f128 = float128_mul(a, b, &soft_status);
+ break;
+ case OP_DIV:
+ res.f128 = float128_div(a, b, &soft_status);
+ break;
+ /* case OP_FMA: */
+ /* res.f128 = float128_muladd(a, b, c, 0, &soft_status); */
+ /* break; */
+ case OP_SQRT:
+ res.f128 = float128_sqrt(a, &soft_status);
+ break;
+ case OP_CMP:
+ res.u64 = float128_compare_quiet(a, b, &soft_status);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+ break;
default:
g_assert_not_reached();
}
@@ -369,7 +439,8 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
- GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops)
+ GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops) \
+ GEN_BENCH(bench_ ## opname ## _float128, float128, PREC_FLOAT128, op, n_ops)
GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
@@ -383,7 +454,8 @@ GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
- GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n)
+ GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n) \
+ GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n)
GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
#undef GEN_BENCH_ALL_TYPES_NO_NEG
@@ -397,6 +469,7 @@ GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
[PREC_DOUBLE] = bench_ ## opname ## _double, \
[PREC_FLOAT32] = bench_ ## opname ## _float32, \
[PREC_FLOAT64] = bench_ ## opname ## _float64, \
+ [PREC_FLOAT128] = bench_ ## opname ## _float128, \
}
static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
@@ -445,7 +518,7 @@ static void usage_complete(int argc, char *argv[])
fprintf(stderr, " -h = show this help message.\n");
fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
op_list, op_names[0]);
- fprintf(stderr, " -p = floating point precision (single, double). "
+ fprintf(stderr, " -p = floating point precision (single, double, quad[soft only]). "
"Default: single\n");
fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
"Default: even\n");
@@ -565,6 +638,8 @@ static void parse_args(int argc, char *argv[])
precision = PREC_SINGLE;
} else if (!strcmp(optarg, "double")) {
precision = PREC_DOUBLE;
+ } else if (!strcmp(optarg, "quad")) {
+ precision = PREC_QUAD;
} else {
fprintf(stderr, "Unsupported precision '%s'\n", optarg);
exit(EXIT_FAILURE);
@@ -608,6 +683,9 @@ static void parse_args(int argc, char *argv[])
case PREC_DOUBLE:
precision = PREC_FLOAT64;
break;
+ case PREC_QUAD:
+ precision = PREC_FLOAT128;
+ break;
default:
g_assert_not_reached();
}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 06/46] softfloat: Move the binary point to the msb
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (4 preceding siblings ...)
2021-05-16 12:33 ` [PULL 05/46] tests/fp: add quad support to the benchmark utility Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 07/46] softfloat: Inline float_raise Richard Henderson
` (41 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Rather than point the binary point at msb-1, put it at the msb.
Use uadd64_overflow to detect when addition overflows instead
of DECOMPOSED_OVERFLOW_BIT.
This reduces the number of special cases within the code, such
as shifting an int64_t either left or right during conversion.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 169 +++++++++++++++++++-----------------------------
1 file changed, 66 insertions(+), 103 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 67cfa0fd82..cd777743f1 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -503,9 +503,8 @@ typedef struct {
bool sign;
} FloatParts;
-#define DECOMPOSED_BINARY_POINT (64 - 2)
+#define DECOMPOSED_BINARY_POINT 63
#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
-#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
/* Structure holding all of the relevant parameters for a format.
* exp_size: the size of the exponent field
@@ -657,7 +656,7 @@ static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
part.cls = float_class_zero;
part.frac = 0;
} else {
- int shift = clz64(part.frac) - 1;
+ int shift = clz64(part.frac);
part.cls = float_class_normal;
part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
part.frac <<= shift;
@@ -727,9 +726,8 @@ static FloatParts round_canonical(FloatParts p, float_status *s,
if (likely(exp > 0)) {
if (frac & round_mask) {
flags |= float_flag_inexact;
- frac += inc;
- if (frac & DECOMPOSED_OVERFLOW_BIT) {
- frac >>= 1;
+ if (uadd64_overflow(frac, inc, &frac)) {
+ frac = (frac >> 1) | DECOMPOSED_IMPLICIT_BIT;
exp++;
}
}
@@ -758,9 +756,12 @@ static FloatParts round_canonical(FloatParts p, float_status *s,
p.cls = float_class_zero;
goto do_zero;
} else {
- bool is_tiny = s->tininess_before_rounding
- || (exp < 0)
- || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
+ bool is_tiny = s->tininess_before_rounding || (exp < 0);
+
+ if (!is_tiny) {
+ uint64_t discard;
+ is_tiny = !uadd64_overflow(frac, inc, &discard);
+ }
shift64RightJamming(frac, 1 - exp, &frac);
if (frac & round_mask) {
@@ -985,7 +986,7 @@ static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
a.cls = float_class_zero;
a.sign = s->float_rounding_mode == float_round_down;
} else {
- int shift = clz64(a.frac) - 1;
+ int shift = clz64(a.frac);
a.frac = a.frac << shift;
a.exp = a.exp - shift;
a.sign = a_sign;
@@ -1022,9 +1023,10 @@ static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
a.exp = b.exp;
}
- a.frac += b.frac;
- if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
+
+ if (uadd64_overflow(a.frac, b.frac, &a.frac)) {
shift64RightJamming(a.frac, 1, &a.frac);
+ a.frac |= DECOMPOSED_IMPLICIT_BIT;
a.exp += 1;
}
return a;
@@ -1219,16 +1221,17 @@ static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
int exp = a.exp + b.exp;
mul64To128(a.frac, b.frac, &hi, &lo);
- shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
- if (lo & DECOMPOSED_OVERFLOW_BIT) {
- shift64RightJamming(lo, 1, &lo);
+ if (hi & DECOMPOSED_IMPLICIT_BIT) {
exp += 1;
+ } else {
+ hi <<= 1;
}
+ hi |= (lo != 0);
/* Re-use a */
a.exp = exp;
a.sign = sign;
- a.frac = lo;
+ a.frac = hi;
return a;
}
/* handle all the NaN cases */
@@ -1411,56 +1414,41 @@ static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
p_exp = a.exp + b.exp;
- /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
- * result.
- */
mul64To128(a.frac, b.frac, &hi, &lo);
- /* binary point now at bit 124 */
- /* check for overflow */
- if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
- shift128RightJamming(hi, lo, 1, &hi, &lo);
+ /* Renormalize to the msb. */
+ if (hi & DECOMPOSED_IMPLICIT_BIT) {
p_exp += 1;
+ } else {
+ shortShift128Left(hi, lo, 1, &hi, &lo);
}
/* + add/sub */
- if (c.cls == float_class_zero) {
- /* move binary point back to 62 */
- shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
- } else {
+ if (c.cls != float_class_zero) {
int exp_diff = p_exp - c.exp;
if (p_sign == c.sign) {
/* Addition */
if (exp_diff <= 0) {
- shift128RightJamming(hi, lo,
- DECOMPOSED_BINARY_POINT - exp_diff,
- &hi, &lo);
- lo += c.frac;
+ shift64RightJamming(hi, -exp_diff, &hi);
p_exp = c.exp;
+ if (uadd64_overflow(hi, c.frac, &hi)) {
+ shift64RightJamming(hi, 1, &hi);
+ hi |= DECOMPOSED_IMPLICIT_BIT;
+ p_exp += 1;
+ }
} else {
- uint64_t c_hi, c_lo;
- /* shift c to the same binary point as the product (124) */
- c_hi = c.frac >> 2;
- c_lo = 0;
- shift128RightJamming(c_hi, c_lo,
- exp_diff,
- &c_hi, &c_lo);
- add128(hi, lo, c_hi, c_lo, &hi, &lo);
- /* move binary point back to 62 */
- shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
+ uint64_t c_hi, c_lo, over;
+ shift128RightJamming(c.frac, 0, exp_diff, &c_hi, &c_lo);
+ add192(0, hi, lo, 0, c_hi, c_lo, &over, &hi, &lo);
+ if (over) {
+ shift64RightJamming(hi, 1, &hi);
+ hi |= DECOMPOSED_IMPLICIT_BIT;
+ p_exp += 1;
+ }
}
-
- if (lo & DECOMPOSED_OVERFLOW_BIT) {
- shift64RightJamming(lo, 1, &lo);
- p_exp += 1;
- }
-
} else {
/* Subtraction */
- uint64_t c_hi, c_lo;
- /* make C binary point match product at bit 124 */
- c_hi = c.frac >> 2;
- c_lo = 0;
+ uint64_t c_hi = c.frac, c_lo = 0;
if (exp_diff <= 0) {
shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
@@ -1495,20 +1483,15 @@ static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
/* Normalizing to a binary point of 124 is the
correct adjust for the exponent. However since we're
shifting, we might as well put the binary point back
- at 62 where we really want it. Therefore shift as
+ at 63 where we really want it. Therefore shift as
if we're leaving 1 bit at the top of the word, but
adjust the exponent as if we're leaving 3 bits. */
- shift -= 1;
- if (shift >= 64) {
- lo = lo << (shift - 64);
- } else {
- hi = (hi << shift) | (lo >> (64 - shift));
- lo = hi | ((lo << shift) != 0);
- }
- p_exp -= shift - 2;
+ shift128Left(hi, lo, shift, &hi, &lo);
+ p_exp -= shift;
}
}
}
+ hi |= (lo != 0);
if (flags & float_muladd_halve_result) {
p_exp -= 1;
@@ -1518,7 +1501,7 @@ static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
a.cls = float_class_normal;
a.sign = p_sign ^ sign_flip;
a.exp = p_exp;
- a.frac = lo;
+ a.frac = hi;
return a;
}
@@ -1742,25 +1725,17 @@ static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
* exponent to match.
*
* The udiv_qrnnd algorithm that we're using requires normalization,
- * i.e. the msb of the denominator must be set. Since we know that
- * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
- * by one (more), and the remainder must be shifted right by one.
+ * i.e. the msb of the denominator must be set, which is already true.
*/
if (a.frac < b.frac) {
exp -= 1;
- shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
- } else {
shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
+ } else {
+ shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT, &n1, &n0);
}
- q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
+ q = udiv_qrnnd(&r, n1, n0, b.frac);
- /*
- * Set lsb if there is a remainder, to set inexact.
- * As mentioned above, to find the actual value of the remainder we
- * would need to shift right, but (1) we are only concerned about
- * non-zero-ness, and (2) the remainder will always be even because
- * both inputs to the division primitive are even.
- */
+ /* Set lsb if there is a remainder, to set inexact. */
a.frac = q | (r != 0);
a.sign = sign;
a.exp = exp;
@@ -2135,12 +2110,12 @@ static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
if (a.frac & rnd_mask) {
s->float_exception_flags |= float_flag_inexact;
- a.frac += inc;
- a.frac &= ~rnd_mask;
- if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
+ if (uadd64_overflow(a.frac, inc, &a.frac)) {
a.frac >>= 1;
+ a.frac |= DECOMPOSED_IMPLICIT_BIT;
a.exp++;
}
+ a.frac &= ~rnd_mask;
}
}
break;
@@ -2213,10 +2188,8 @@ static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode,
case float_class_zero:
return 0;
case float_class_normal:
- if (p.exp < DECOMPOSED_BINARY_POINT) {
+ if (p.exp <= DECOMPOSED_BINARY_POINT) {
r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
- } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
- r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
} else {
r = UINT64_MAX;
}
@@ -2498,10 +2471,8 @@ static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode,
return 0;
}
- if (p.exp < DECOMPOSED_BINARY_POINT) {
+ if (p.exp <= DECOMPOSED_BINARY_POINT) {
r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
- } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
- r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
} else {
s->float_exception_flags = orig_flags | float_flag_invalid;
return max;
@@ -2765,11 +2736,11 @@ static FloatParts int_to_float(int64_t a, int scale, float_status *status)
f = -f;
r.sign = true;
}
- shift = clz64(f) - 1;
+ shift = clz64(f);
scale = MIN(MAX(scale, -0x10000), 0x10000);
r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
- r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
+ r.frac = f << shift;
}
return r;
@@ -2920,21 +2891,16 @@ bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
{
FloatParts r = { .sign = false };
+ int shift;
if (a == 0) {
r.cls = float_class_zero;
} else {
scale = MIN(MAX(scale, -0x10000), 0x10000);
+ shift = clz64(a);
r.cls = float_class_normal;
- if ((int64_t)a < 0) {
- r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
- shift64RightJamming(a, 1, &a);
- r.frac = a;
- } else {
- int shift = clz64(a) - 1;
- r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
- r.frac = a << shift;
- }
+ r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
+ r.frac = a << shift;
}
return r;
@@ -3475,12 +3441,9 @@ static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
/* We need two overflow bits at the top. Adding room for that is a
* right shift. If the exponent is odd, we can discard the low bit
* by multiplying the fraction by 2; that's a left shift. Combine
- * those and we shift right if the exponent is even.
+ * those and we shift right by 1 if the exponent is odd, otherwise 2.
*/
- a_frac = a.frac;
- if (!(a.exp & 1)) {
- a_frac >>= 1;
- }
+ a_frac = a.frac >> (2 - (a.exp & 1));
a.exp >>= 1;
/* Bit-by-bit computation of sqrt. */
@@ -3488,10 +3451,10 @@ static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
s_frac = 0;
/* Iterate from implicit bit down to the 3 extra bits to compute a
- * properly rounded result. Remember we've inserted one more bit
- * at the top, so these positions are one less.
+ * properly rounded result. Remember we've inserted two more bits
+ * at the top, so these positions are two less.
*/
- bit = DECOMPOSED_BINARY_POINT - 1;
+ bit = DECOMPOSED_BINARY_POINT - 2;
last_bit = MAX(p->frac_shift - 4, 0);
do {
uint64_t q = 1ULL << bit;
@@ -3507,7 +3470,7 @@ static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
/* Undo the right shift done above. If there is any remaining
* fraction, the result is inexact. Set the sticky bit.
*/
- a.frac = (r_frac << 1) + (a_frac != 0);
+ a.frac = (r_frac << 2) + (a_frac != 0);
return a;
}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 07/46] softfloat: Inline float_raise
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (5 preceding siblings ...)
2021-05-16 12:33 ` [PULL 06/46] softfloat: Move the binary point to the msb Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 08/46] softfloat: Use float_raise in more places Richard Henderson
` (40 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Philippe Mathieu-Daudé, David Hildenbrand
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat.h | 5 ++++-
fpu/softfloat-specialize.c.inc | 12 ------------
2 files changed, 4 insertions(+), 13 deletions(-)
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 78ad5ca738..019c2ec66d 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -100,7 +100,10 @@ typedef enum {
| Routine to raise any or all of the software IEC/IEEE floating-point
| exception flags.
*----------------------------------------------------------------------------*/
-void float_raise(uint8_t flags, float_status *status);
+static inline void float_raise(uint8_t flags, float_status *status)
+{
+ status->float_exception_flags |= flags;
+}
/*----------------------------------------------------------------------------
| If `a' is denormal and we are in flush-to-zero mode then set the
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index e19809c04b..96ed8c1a26 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -227,18 +227,6 @@ floatx80 floatx80_default_nan(float_status *status)
const floatx80 floatx80_infinity
= make_floatx80_init(floatx80_infinity_high, floatx80_infinity_low);
-/*----------------------------------------------------------------------------
-| Raises the exceptions specified by `flags'. Floating-point traps can be
-| defined here if desired. It is currently not possible for such a trap
-| to substitute a result value. If traps are not implemented, this routine
-| should be simply `float_exception_flags |= flags;'.
-*----------------------------------------------------------------------------*/
-
-void float_raise(uint8_t flags, float_status *status)
-{
- status->float_exception_flags |= flags;
-}
-
/*----------------------------------------------------------------------------
| Internal canonical NaN format.
*----------------------------------------------------------------------------*/
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 08/46] softfloat: Use float_raise in more places
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (6 preceding siblings ...)
2021-05-16 12:33 ` [PULL 07/46] softfloat: Inline float_raise Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 09/46] softfloat: Tidy a * b + inf return Richard Henderson
` (39 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Philippe Mathieu-Daudé, David Hildenbrand
We have been somewhat inconsistent about when to use
float_raise and when to or in the bit by hand.
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 87 +++++++++++++++++++++++++------------------------
1 file changed, 44 insertions(+), 43 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index cd777743f1..93fe785809 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -132,7 +132,7 @@ this code that are retained.
if (unlikely(soft_t ## _is_denormal(*a))) { \
*a = soft_t ## _set_sign(soft_t ## _zero, \
soft_t ## _is_neg(*a)); \
- s->float_exception_flags |= float_flag_input_denormal; \
+ float_raise(float_flag_input_denormal, s); \
} \
}
@@ -360,7 +360,7 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
ur.h = hard(ua.h, ub.h);
if (unlikely(f32_is_inf(ur))) {
- s->float_exception_flags |= float_flag_overflow;
+ float_raise(float_flag_overflow, s);
} else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
goto soft;
}
@@ -391,7 +391,7 @@ float64_gen2(float64 xa, float64 xb, float_status *s,
ur.h = hard(ua.h, ub.h);
if (unlikely(f64_is_inf(ur))) {
- s->float_exception_flags |= float_flag_overflow;
+ float_raise(float_flag_overflow, s);
} else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
goto soft;
}
@@ -880,7 +880,7 @@ static FloatParts return_nan(FloatParts a, float_status *s)
{
switch (a.cls) {
case float_class_snan:
- s->float_exception_flags |= float_flag_invalid;
+ float_raise(float_flag_invalid, s);
a = parts_silence_nan(a, s);
/* fall through */
case float_class_qnan:
@@ -898,7 +898,7 @@ static FloatParts return_nan(FloatParts a, float_status *s)
static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
{
if (is_snan(a.cls) || is_snan(b.cls)) {
- s->float_exception_flags |= float_flag_invalid;
+ float_raise(float_flag_invalid, s);
}
if (s->default_nan_mode) {
@@ -922,7 +922,7 @@ static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
int which;
if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
- s->float_exception_flags |= float_flag_invalid;
+ float_raise(float_flag_invalid, s);
}
which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
@@ -1241,7 +1241,7 @@ static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
/* Inf * Zero == NaN */
if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
(a.cls == float_class_zero && b.cls == float_class_inf)) {
- s->float_exception_flags |= float_flag_invalid;
+ float_raise(float_flag_invalid, s);
return parts_default_nan(s);
}
/* Multiply by 0 or Inf */
@@ -1356,6 +1356,7 @@ static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
}
if (inf_zero) {
+ float_raise(float_flag_invalid, s);
s->float_exception_flags |= float_flag_invalid;
return parts_default_nan(s);
}
@@ -1380,7 +1381,7 @@ static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
if (c.cls == float_class_inf) {
if (p_class == float_class_inf && p_sign != c.sign) {
- s->float_exception_flags |= float_flag_invalid;
+ float_raise(float_flag_invalid, s);
return parts_default_nan(s);
} else {
a.cls = float_class_inf;
@@ -1598,7 +1599,7 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
ur.h = fmaf(ua.h, ub.h, uc.h);
if (unlikely(f32_is_inf(ur))) {
- s->float_exception_flags |= float_flag_overflow;
+ float_raise(float_flag_overflow, s);
} else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
ua = ua_orig;
uc = uc_orig;
@@ -1669,7 +1670,7 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
ur.h = fma(ua.h, ub.h, uc.h);
if (unlikely(f64_is_inf(ur))) {
- s->float_exception_flags |= float_flag_overflow;
+ float_raise(float_flag_overflow, s);
} else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
ua = ua_orig;
uc = uc_orig;
@@ -1749,7 +1750,7 @@ static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
if (a.cls == b.cls
&&
(a.cls == float_class_inf || a.cls == float_class_zero)) {
- s->float_exception_flags |= float_flag_invalid;
+ float_raise(float_flag_invalid, s);
return parts_default_nan(s);
}
/* Inf / x or 0 / x */
@@ -1759,7 +1760,7 @@ static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
}
/* Div 0 => Inf */
if (b.cls == float_class_zero) {
- s->float_exception_flags |= float_flag_divbyzero;
+ float_raise(float_flag_divbyzero, s);
a.cls = float_class_inf;
a.sign = sign;
return a;
@@ -1895,7 +1896,7 @@ static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
/* There is no NaN in the destination format. Raise Invalid
* and return a zero with the sign of the input NaN.
*/
- s->float_exception_flags |= float_flag_invalid;
+ float_raise(float_flag_invalid, s);
a.cls = float_class_zero;
a.frac = 0;
a.exp = 0;
@@ -1905,7 +1906,7 @@ static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
/* There is no Inf in the destination format. Raise Invalid
* and return the maximum normal with the correct sign.
*/
- s->float_exception_flags |= float_flag_invalid;
+ float_raise(float_flag_invalid, s);
a.cls = float_class_normal;
a.exp = dstf->exp_max;
a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
@@ -1916,7 +1917,7 @@ static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
}
} else if (is_nan(a.cls)) {
if (is_snan(a.cls)) {
- s->float_exception_flags |= float_flag_invalid;
+ float_raise(float_flag_invalid, s);
a = parts_silence_nan(a, s);
}
if (s->default_nan_mode) {
@@ -2048,7 +2049,7 @@ static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
if (a.exp < 0) {
bool one;
/* all fractional */
- s->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, s);
switch (rmode) {
case float_round_nearest_even:
one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
@@ -2109,7 +2110,7 @@ static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
}
if (a.frac & rnd_mask) {
- s->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, s);
if (uadd64_overflow(a.frac, inc, &a.frac)) {
a.frac >>= 1;
a.frac |= DECOMPOSED_IMPLICIT_BIT;
@@ -3188,7 +3189,7 @@ static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
if (!is_quiet ||
a.cls == float_class_snan ||
b.cls == float_class_snan) {
- s->float_exception_flags |= float_flag_invalid;
+ float_raise(float_flag_invalid, s);
}
return float_relation_unordered;
}
@@ -3429,7 +3430,7 @@ static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
return a; /* sqrt(+-0) = +-0 */
}
if (a.sign) {
- s->float_exception_flags |= float_flag_invalid;
+ float_raise(float_flag_invalid, s);
return parts_default_nan(s);
}
if (a.cls == float_class_inf) {
@@ -3760,7 +3761,7 @@ static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
return zSign ? INT32_MIN : INT32_MAX;
}
if (roundBits) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
return z;
@@ -3822,7 +3823,7 @@ static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
return zSign ? INT64_MIN : INT64_MAX;
}
if (absZ1) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
return z;
@@ -3883,7 +3884,7 @@ static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
}
if (absZ1) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
return absZ0;
}
@@ -3994,7 +3995,7 @@ static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
}
}
if (roundBits) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
zSig = ( zSig + roundIncrement )>>7;
if (!(roundBits ^ 0x40) && roundNearestEven) {
@@ -4150,7 +4151,7 @@ static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
}
}
if (roundBits) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
zSig = ( zSig + roundIncrement )>>10;
if (!(roundBits ^ 0x200) && roundNearestEven) {
@@ -4284,7 +4285,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
float_raise(float_flag_underflow, status);
}
if (roundBits) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
zSig0 += roundIncrement;
if ( (int64_t) zSig0 < 0 ) zExp = 1;
@@ -4297,7 +4298,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
}
}
if (roundBits) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
zSig0 += roundIncrement;
if ( zSig0 < roundIncrement ) {
@@ -4360,7 +4361,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
float_raise(float_flag_underflow, status);
}
if (zSig1) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
switch (roundingMode) {
case float_round_nearest_even:
@@ -4390,7 +4391,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
}
}
if (zSig1) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
if ( increment ) {
++zSig0;
@@ -4667,7 +4668,7 @@ static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
}
}
if (zSig2) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
if ( increment ) {
add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
@@ -5405,7 +5406,7 @@ int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
}
else if ( aExp < 0x3FFF ) {
if (aExp || aSig) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
return 0;
}
@@ -5420,7 +5421,7 @@ int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
}
if ( ( aSig<<shiftCount ) != savedASig ) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
return z;
@@ -5504,13 +5505,13 @@ int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
}
else if ( aExp < 0x3FFF ) {
if (aExp | aSig) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
return 0;
}
z = aSig>>( - shiftCount );
if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
if ( aSign ) z = - z;
return z;
@@ -5661,7 +5662,7 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
&& ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
return a;
}
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
aSign = extractFloatx80Sign( a );
switch (status->float_rounding_mode) {
case float_round_nearest_even:
@@ -5728,7 +5729,7 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
z.low = UINT64_C(0x8000000000000000);
}
if (z.low != a.low) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
return z;
@@ -6364,7 +6365,7 @@ int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
}
else if ( aExp < 0x3FFF ) {
if (aExp || aSig0) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
return 0;
}
@@ -6380,7 +6381,7 @@ int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
return aSign ? INT32_MIN : INT32_MAX;
}
if ( ( aSig0<<shiftCount ) != savedASig ) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
return z;
@@ -6458,7 +6459,7 @@ int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
if ( ( a.high == UINT64_C(0xC03E000000000000) )
&& ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
if (aSig1) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
}
else {
@@ -6471,20 +6472,20 @@ int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
}
z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
if ( (uint64_t) ( aSig1<<shiftCount ) ) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
}
else {
if ( aExp < 0x3FFF ) {
if ( aExp | aSig0 | aSig1 ) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
return 0;
}
z = aSig0>>( - shiftCount );
if ( aSig1
|| ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
}
if ( aSign ) z = - z;
@@ -6793,7 +6794,7 @@ float128 float128_round_to_int(float128 a, float_status *status)
else {
if ( aExp < 0x3FFF ) {
if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
aSign = extractFloat128Sign( a );
switch (status->float_rounding_mode) {
case float_round_nearest_even:
@@ -6867,7 +6868,7 @@ float128 float128_round_to_int(float128 a, float_status *status)
z.high &= ~ roundBitsMask;
}
if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
- status->float_exception_flags |= float_flag_inexact;
+ float_raise(float_flag_inexact, status);
}
return z;
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 09/46] softfloat: Tidy a * b + inf return
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (7 preceding siblings ...)
2021-05-16 12:33 ` [PULL 08/46] softfloat: Use float_raise in more places Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 10/46] softfloat: Add float_cmask and constants Richard Henderson
` (38 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel
Cc: peter.maydell, Alex Bennée, Philippe Mathieu-Daudé,
David Hildenbrand
No reason to set values in 'a', when we already
have float_class_inf in 'c', and can flip that sign.
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 93fe785809..ee4b5073b6 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1384,9 +1384,8 @@ static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
float_raise(float_flag_invalid, s);
return parts_default_nan(s);
} else {
- a.cls = float_class_inf;
- a.sign = c.sign ^ sign_flip;
- return a;
+ c.sign ^= sign_flip;
+ return c;
}
}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 10/46] softfloat: Add float_cmask and constants
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (8 preceding siblings ...)
2021-05-16 12:33 ` [PULL 09/46] softfloat: Tidy a * b + inf return Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 11/46] softfloat: Use return_nan in float_to_float Richard Henderson
` (37 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée, David Hildenbrand
Testing more than one class at a time is better done with masks.
This reduces the static branch count.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index ee4b5073b6..64edb23793 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -469,6 +469,20 @@ typedef enum __attribute__ ((__packed__)) {
float_class_snan,
} FloatClass;
+#define float_cmask(bit) (1u << (bit))
+
+enum {
+ float_cmask_zero = float_cmask(float_class_zero),
+ float_cmask_normal = float_cmask(float_class_normal),
+ float_cmask_inf = float_cmask(float_class_inf),
+ float_cmask_qnan = float_cmask(float_class_qnan),
+ float_cmask_snan = float_cmask(float_class_snan),
+
+ float_cmask_infzero = float_cmask_zero | float_cmask_inf,
+ float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
+};
+
+
/* Simple helpers for checking if, or what kind of, NaN we have */
static inline __attribute__((unused)) bool is_nan(FloatClass c)
{
@@ -1338,26 +1352,28 @@ bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
int flags, float_status *s)
{
- bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
- ((1 << float_class_inf) | (1 << float_class_zero));
- bool p_sign;
+ bool inf_zero, p_sign;
bool sign_flip = flags & float_muladd_negate_result;
FloatClass p_class;
uint64_t hi, lo;
int p_exp;
+ int ab_mask, abc_mask;
+
+ ab_mask = float_cmask(a.cls) | float_cmask(b.cls);
+ abc_mask = float_cmask(c.cls) | ab_mask;
+ inf_zero = ab_mask == float_cmask_infzero;
/* It is implementation-defined whether the cases of (0,inf,qnan)
* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
* they return if they do), so we have to hand this information
* off to the target-specific pick-a-NaN routine.
*/
- if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
+ if (unlikely(abc_mask & float_cmask_anynan)) {
return pick_nan_muladd(a, b, c, inf_zero, s);
}
if (inf_zero) {
float_raise(float_flag_invalid, s);
- s->float_exception_flags |= float_flag_invalid;
return parts_default_nan(s);
}
@@ -1371,9 +1387,9 @@ static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
p_sign ^= 1;
}
- if (a.cls == float_class_inf || b.cls == float_class_inf) {
+ if (ab_mask & float_cmask_inf) {
p_class = float_class_inf;
- } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
+ } else if (ab_mask & float_cmask_zero) {
p_class = float_class_zero;
} else {
p_class = float_class_normal;
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 11/46] softfloat: Use return_nan in float_to_float
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (9 preceding siblings ...)
2021-05-16 12:33 ` [PULL 10/46] softfloat: Add float_cmask and constants Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 12/46] softfloat: fix return_nan vs default_nan_mode Richard Henderson
` (36 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée, David Hildenbrand
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 64edb23793..b694e38522 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1931,13 +1931,7 @@ static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
break;
}
} else if (is_nan(a.cls)) {
- if (is_snan(a.cls)) {
- float_raise(float_flag_invalid, s);
- a = parts_silence_nan(a, s);
- }
- if (s->default_nan_mode) {
- return parts_default_nan(s);
- }
+ return return_nan(a, s);
}
return a;
}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 12/46] softfloat: fix return_nan vs default_nan_mode
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (10 preceding siblings ...)
2021-05-16 12:33 ` [PULL 11/46] softfloat: Use return_nan in float_to_float Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 13/46] target/mips: Set set_default_nan_mode with set_snan_bit_is_one Richard Henderson
` (35 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée, David Hildenbrand
Do not call parts_silence_nan when default_nan_mode is in
effect. This will avoid an assert in a later patch.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 19 +++++++------------
1 file changed, 7 insertions(+), 12 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index b694e38522..6589f00b23 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -892,21 +892,16 @@ static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
static FloatParts return_nan(FloatParts a, float_status *s)
{
- switch (a.cls) {
- case float_class_snan:
+ g_assert(is_nan(a.cls));
+ if (is_snan(a.cls)) {
float_raise(float_flag_invalid, s);
- a = parts_silence_nan(a, s);
- /* fall through */
- case float_class_qnan:
- if (s->default_nan_mode) {
- return parts_default_nan(s);
+ if (!s->default_nan_mode) {
+ return parts_silence_nan(a, s);
}
- break;
-
- default:
- g_assert_not_reached();
+ } else if (!s->default_nan_mode) {
+ return a;
}
- return a;
+ return parts_default_nan(s);
}
static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 13/46] target/mips: Set set_default_nan_mode with set_snan_bit_is_one
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (11 preceding siblings ...)
2021-05-16 12:33 ` [PULL 12/46] softfloat: fix return_nan vs default_nan_mode Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:33 ` [PULL 14/46] softfloat: Do not produce a default_nan from parts_silence_nan Richard Henderson
` (34 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée, David Hildenbrand
This behavior is currently hard-coded in parts_silence_nan,
but setting this bit properly will allow this to be cleaned up.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/mips/fpu_helper.h | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h
index 1c2d6d35a7..ad1116e8c1 100644
--- a/target/mips/fpu_helper.h
+++ b/target/mips/fpu_helper.h
@@ -27,8 +27,14 @@ static inline void restore_flush_mode(CPUMIPSState *env)
static inline void restore_snan_bit_mode(CPUMIPSState *env)
{
- set_snan_bit_is_one((env->active_fpu.fcr31 & (1 << FCR31_NAN2008)) == 0,
- &env->active_fpu.fp_status);
+ bool nan2008 = env->active_fpu.fcr31 & (1 << FCR31_NAN2008);
+
+ /*
+ * With nan2008, SNaNs are silenced in the usual way.
+ * Before that, SNaNs are not silenced; default nans are produced.
+ */
+ set_snan_bit_is_one(!nan2008, &env->active_fpu.fp_status);
+ set_default_nan_mode(!nan2008, &env->active_fpu.fp_status);
}
static inline void restore_fp_status(CPUMIPSState *env)
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 14/46] softfloat: Do not produce a default_nan from parts_silence_nan
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (12 preceding siblings ...)
2021-05-16 12:33 ` [PULL 13/46] target/mips: Set set_default_nan_mode with set_snan_bit_is_one Richard Henderson
@ 2021-05-16 12:33 ` Richard Henderson
2021-05-16 12:34 ` [PULL 15/46] softfloat: Rename FloatParts to FloatParts64 Richard Henderson
` (33 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:33 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée, David Hildenbrand
Require default_nan_mode to be set instead.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat-specialize.c.inc | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index 96ed8c1a26..05cb2ee940 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -179,16 +179,15 @@ static FloatParts parts_default_nan(float_status *status)
static FloatParts parts_silence_nan(FloatParts a, float_status *status)
{
g_assert(!no_signaling_nans(status));
-#if defined(TARGET_HPPA)
- a.frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1));
- a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2);
-#else
+ g_assert(!status->default_nan_mode);
+
+ /* The only snan_bit_is_one target without default_nan_mode is HPPA. */
if (snan_bit_is_one(status)) {
- return parts_default_nan(status);
+ a.frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1));
+ a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2);
} else {
a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1);
}
-#endif
a.cls = float_class_qnan;
return a;
}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 15/46] softfloat: Rename FloatParts to FloatParts64
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (13 preceding siblings ...)
2021-05-16 12:33 ` [PULL 14/46] softfloat: Do not produce a default_nan from parts_silence_nan Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 16/46] softfloat: Move type-specific pack/unpack routines Richard Henderson
` (32 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Philippe Mathieu-Daudé, David Hildenbrand
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 362 ++++++++++++++++-----------------
fpu/softfloat-specialize.c.inc | 6 +-
2 files changed, 184 insertions(+), 184 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 6589f00b23..27b51659c9 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -515,7 +515,7 @@ typedef struct {
int32_t exp;
FloatClass cls;
bool sign;
-} FloatParts;
+} FloatParts64;
#define DECOMPOSED_BINARY_POINT 63
#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
@@ -580,11 +580,11 @@ static const FloatFmt float64_params = {
};
/* Unpack a float to parts, but do not canonicalize. */
-static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
+static inline FloatParts64 unpack_raw(FloatFmt fmt, uint64_t raw)
{
const int sign_pos = fmt.frac_size + fmt.exp_size;
- return (FloatParts) {
+ return (FloatParts64) {
.cls = float_class_unclassified,
.sign = extract64(raw, sign_pos, 1),
.exp = extract64(raw, fmt.frac_size, fmt.exp_size),
@@ -592,50 +592,50 @@ static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
};
}
-static inline FloatParts float16_unpack_raw(float16 f)
+static inline FloatParts64 float16_unpack_raw(float16 f)
{
return unpack_raw(float16_params, f);
}
-static inline FloatParts bfloat16_unpack_raw(bfloat16 f)
+static inline FloatParts64 bfloat16_unpack_raw(bfloat16 f)
{
return unpack_raw(bfloat16_params, f);
}
-static inline FloatParts float32_unpack_raw(float32 f)
+static inline FloatParts64 float32_unpack_raw(float32 f)
{
return unpack_raw(float32_params, f);
}
-static inline FloatParts float64_unpack_raw(float64 f)
+static inline FloatParts64 float64_unpack_raw(float64 f)
{
return unpack_raw(float64_params, f);
}
/* Pack a float from parts, but do not canonicalize. */
-static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
+static inline uint64_t pack_raw(FloatFmt fmt, FloatParts64 p)
{
const int sign_pos = fmt.frac_size + fmt.exp_size;
uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
return deposit64(ret, sign_pos, 1, p.sign);
}
-static inline float16 float16_pack_raw(FloatParts p)
+static inline float16 float16_pack_raw(FloatParts64 p)
{
return make_float16(pack_raw(float16_params, p));
}
-static inline bfloat16 bfloat16_pack_raw(FloatParts p)
+static inline bfloat16 bfloat16_pack_raw(FloatParts64 p)
{
return pack_raw(bfloat16_params, p);
}
-static inline float32 float32_pack_raw(FloatParts p)
+static inline float32 float32_pack_raw(FloatParts64 p)
{
return make_float32(pack_raw(float32_params, p));
}
-static inline float64 float64_pack_raw(FloatParts p)
+static inline float64 float64_pack_raw(FloatParts64 p)
{
return make_float64(pack_raw(float64_params, p));
}
@@ -651,7 +651,7 @@ static inline float64 float64_pack_raw(FloatParts p)
#include "softfloat-specialize.c.inc"
/* Canonicalize EXP and FRAC, setting CLS. */
-static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
+static FloatParts64 sf_canonicalize(FloatParts64 part, const FloatFmt *parm,
float_status *status)
{
if (part.exp == parm->exp_max && !parm->arm_althp) {
@@ -689,7 +689,7 @@ static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
* by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
*/
-static FloatParts round_canonical(FloatParts p, float_status *s,
+static FloatParts64 round_canonical(FloatParts64 p, float_status *s,
const FloatFmt *parm)
{
const uint64_t frac_lsb = parm->frac_lsb;
@@ -838,59 +838,59 @@ static FloatParts round_canonical(FloatParts p, float_status *s,
}
/* Explicit FloatFmt version */
-static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
+static FloatParts64 float16a_unpack_canonical(float16 f, float_status *s,
const FloatFmt *params)
{
return sf_canonicalize(float16_unpack_raw(f), params, s);
}
-static FloatParts float16_unpack_canonical(float16 f, float_status *s)
+static FloatParts64 float16_unpack_canonical(float16 f, float_status *s)
{
return float16a_unpack_canonical(f, s, &float16_params);
}
-static FloatParts bfloat16_unpack_canonical(bfloat16 f, float_status *s)
+static FloatParts64 bfloat16_unpack_canonical(bfloat16 f, float_status *s)
{
return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s);
}
-static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
+static float16 float16a_round_pack_canonical(FloatParts64 p, float_status *s,
const FloatFmt *params)
{
return float16_pack_raw(round_canonical(p, s, params));
}
-static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
+static float16 float16_round_pack_canonical(FloatParts64 p, float_status *s)
{
return float16a_round_pack_canonical(p, s, &float16_params);
}
-static bfloat16 bfloat16_round_pack_canonical(FloatParts p, float_status *s)
+static bfloat16 bfloat16_round_pack_canonical(FloatParts64 p, float_status *s)
{
return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params));
}
-static FloatParts float32_unpack_canonical(float32 f, float_status *s)
+static FloatParts64 float32_unpack_canonical(float32 f, float_status *s)
{
return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
}
-static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
+static float32 float32_round_pack_canonical(FloatParts64 p, float_status *s)
{
return float32_pack_raw(round_canonical(p, s, &float32_params));
}
-static FloatParts float64_unpack_canonical(float64 f, float_status *s)
+static FloatParts64 float64_unpack_canonical(float64 f, float_status *s)
{
return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
}
-static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
+static float64 float64_round_pack_canonical(FloatParts64 p, float_status *s)
{
return float64_pack_raw(round_canonical(p, s, &float64_params));
}
-static FloatParts return_nan(FloatParts a, float_status *s)
+static FloatParts64 return_nan(FloatParts64 a, float_status *s)
{
g_assert(is_nan(a.cls));
if (is_snan(a.cls)) {
@@ -904,7 +904,7 @@ static FloatParts return_nan(FloatParts a, float_status *s)
return parts_default_nan(s);
}
-static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
+static FloatParts64 pick_nan(FloatParts64 a, FloatParts64 b, float_status *s)
{
if (is_snan(a.cls) || is_snan(b.cls)) {
float_raise(float_flag_invalid, s);
@@ -925,7 +925,7 @@ static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
return a;
}
-static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
+static FloatParts64 pick_nan_muladd(FloatParts64 a, FloatParts64 b, FloatParts64 c,
bool inf_zero, float_status *s)
{
int which;
@@ -971,7 +971,7 @@ static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
* Arithmetic.
*/
-static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
+static FloatParts64 addsub_floats(FloatParts64 a, FloatParts64 b, bool subtract,
float_status *s)
{
bool a_sign = a.sign;
@@ -1062,18 +1062,18 @@ static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
{
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pb = float16_unpack_canonical(b, status);
- FloatParts pr = addsub_floats(pa, pb, false, status);
+ FloatParts64 pa = float16_unpack_canonical(a, status);
+ FloatParts64 pb = float16_unpack_canonical(b, status);
+ FloatParts64 pr = addsub_floats(pa, pb, false, status);
return float16_round_pack_canonical(pr, status);
}
float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
{
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pb = float16_unpack_canonical(b, status);
- FloatParts pr = addsub_floats(pa, pb, true, status);
+ FloatParts64 pa = float16_unpack_canonical(a, status);
+ FloatParts64 pb = float16_unpack_canonical(b, status);
+ FloatParts64 pr = addsub_floats(pa, pb, true, status);
return float16_round_pack_canonical(pr, status);
}
@@ -1081,9 +1081,9 @@ float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
{
- FloatParts pa = float32_unpack_canonical(a, status);
- FloatParts pb = float32_unpack_canonical(b, status);
- FloatParts pr = addsub_floats(pa, pb, subtract, status);
+ FloatParts64 pa = float32_unpack_canonical(a, status);
+ FloatParts64 pb = float32_unpack_canonical(b, status);
+ FloatParts64 pr = addsub_floats(pa, pb, subtract, status);
return float32_round_pack_canonical(pr, status);
}
@@ -1101,9 +1101,9 @@ static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
{
- FloatParts pa = float64_unpack_canonical(a, status);
- FloatParts pb = float64_unpack_canonical(b, status);
- FloatParts pr = addsub_floats(pa, pb, subtract, status);
+ FloatParts64 pa = float64_unpack_canonical(a, status);
+ FloatParts64 pb = float64_unpack_canonical(b, status);
+ FloatParts64 pr = addsub_floats(pa, pb, subtract, status);
return float64_round_pack_canonical(pr, status);
}
@@ -1199,18 +1199,18 @@ float64_sub(float64 a, float64 b, float_status *s)
*/
bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
{
- FloatParts pa = bfloat16_unpack_canonical(a, status);
- FloatParts pb = bfloat16_unpack_canonical(b, status);
- FloatParts pr = addsub_floats(pa, pb, false, status);
+ FloatParts64 pa = bfloat16_unpack_canonical(a, status);
+ FloatParts64 pb = bfloat16_unpack_canonical(b, status);
+ FloatParts64 pr = addsub_floats(pa, pb, false, status);
return bfloat16_round_pack_canonical(pr, status);
}
bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
{
- FloatParts pa = bfloat16_unpack_canonical(a, status);
- FloatParts pb = bfloat16_unpack_canonical(b, status);
- FloatParts pr = addsub_floats(pa, pb, true, status);
+ FloatParts64 pa = bfloat16_unpack_canonical(a, status);
+ FloatParts64 pb = bfloat16_unpack_canonical(b, status);
+ FloatParts64 pr = addsub_floats(pa, pb, true, status);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -1221,7 +1221,7 @@ bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
* for Binary Floating-Point Arithmetic.
*/
-static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
+static FloatParts64 mul_floats(FloatParts64 a, FloatParts64 b, float_status *s)
{
bool sign = a.sign ^ b.sign;
@@ -1267,9 +1267,9 @@ static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
{
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pb = float16_unpack_canonical(b, status);
- FloatParts pr = mul_floats(pa, pb, status);
+ FloatParts64 pa = float16_unpack_canonical(a, status);
+ FloatParts64 pb = float16_unpack_canonical(b, status);
+ FloatParts64 pr = mul_floats(pa, pb, status);
return float16_round_pack_canonical(pr, status);
}
@@ -1277,9 +1277,9 @@ float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_mul(float32 a, float32 b, float_status *status)
{
- FloatParts pa = float32_unpack_canonical(a, status);
- FloatParts pb = float32_unpack_canonical(b, status);
- FloatParts pr = mul_floats(pa, pb, status);
+ FloatParts64 pa = float32_unpack_canonical(a, status);
+ FloatParts64 pb = float32_unpack_canonical(b, status);
+ FloatParts64 pr = mul_floats(pa, pb, status);
return float32_round_pack_canonical(pr, status);
}
@@ -1287,9 +1287,9 @@ soft_f32_mul(float32 a, float32 b, float_status *status)
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_mul(float64 a, float64 b, float_status *status)
{
- FloatParts pa = float64_unpack_canonical(a, status);
- FloatParts pb = float64_unpack_canonical(b, status);
- FloatParts pr = mul_floats(pa, pb, status);
+ FloatParts64 pa = float64_unpack_canonical(a, status);
+ FloatParts64 pb = float64_unpack_canonical(b, status);
+ FloatParts64 pr = mul_floats(pa, pb, status);
return float64_round_pack_canonical(pr, status);
}
@@ -1325,9 +1325,9 @@ float64_mul(float64 a, float64 b, float_status *s)
bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
{
- FloatParts pa = bfloat16_unpack_canonical(a, status);
- FloatParts pb = bfloat16_unpack_canonical(b, status);
- FloatParts pr = mul_floats(pa, pb, status);
+ FloatParts64 pa = bfloat16_unpack_canonical(a, status);
+ FloatParts64 pb = bfloat16_unpack_canonical(b, status);
+ FloatParts64 pr = mul_floats(pa, pb, status);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -1344,7 +1344,7 @@ bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
* NaNs.)
*/
-static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
+static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c,
int flags, float_status *s)
{
bool inf_zero, p_sign;
@@ -1520,10 +1520,10 @@ static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
int flags, float_status *status)
{
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pb = float16_unpack_canonical(b, status);
- FloatParts pc = float16_unpack_canonical(c, status);
- FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+ FloatParts64 pa = float16_unpack_canonical(a, status);
+ FloatParts64 pb = float16_unpack_canonical(b, status);
+ FloatParts64 pc = float16_unpack_canonical(c, status);
+ FloatParts64 pr = muladd_floats(pa, pb, pc, flags, status);
return float16_round_pack_canonical(pr, status);
}
@@ -1532,10 +1532,10 @@ static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
float_status *status)
{
- FloatParts pa = float32_unpack_canonical(a, status);
- FloatParts pb = float32_unpack_canonical(b, status);
- FloatParts pc = float32_unpack_canonical(c, status);
- FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+ FloatParts64 pa = float32_unpack_canonical(a, status);
+ FloatParts64 pb = float32_unpack_canonical(b, status);
+ FloatParts64 pc = float32_unpack_canonical(c, status);
+ FloatParts64 pr = muladd_floats(pa, pb, pc, flags, status);
return float32_round_pack_canonical(pr, status);
}
@@ -1544,10 +1544,10 @@ static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
float_status *status)
{
- FloatParts pa = float64_unpack_canonical(a, status);
- FloatParts pb = float64_unpack_canonical(b, status);
- FloatParts pc = float64_unpack_canonical(c, status);
- FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+ FloatParts64 pa = float64_unpack_canonical(a, status);
+ FloatParts64 pb = float64_unpack_canonical(b, status);
+ FloatParts64 pc = float64_unpack_canonical(c, status);
+ FloatParts64 pr = muladd_floats(pa, pb, pc, flags, status);
return float64_round_pack_canonical(pr, status);
}
@@ -1705,10 +1705,10 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
int flags, float_status *status)
{
- FloatParts pa = bfloat16_unpack_canonical(a, status);
- FloatParts pb = bfloat16_unpack_canonical(b, status);
- FloatParts pc = bfloat16_unpack_canonical(c, status);
- FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+ FloatParts64 pa = bfloat16_unpack_canonical(a, status);
+ FloatParts64 pb = bfloat16_unpack_canonical(b, status);
+ FloatParts64 pc = bfloat16_unpack_canonical(c, status);
+ FloatParts64 pr = muladd_floats(pa, pb, pc, flags, status);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -1719,7 +1719,7 @@ bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
* the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*/
-static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
+static FloatParts64 div_floats(FloatParts64 a, FloatParts64 b, float_status *s)
{
bool sign = a.sign ^ b.sign;
@@ -1786,9 +1786,9 @@ static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
float16 float16_div(float16 a, float16 b, float_status *status)
{
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pb = float16_unpack_canonical(b, status);
- FloatParts pr = div_floats(pa, pb, status);
+ FloatParts64 pa = float16_unpack_canonical(a, status);
+ FloatParts64 pb = float16_unpack_canonical(b, status);
+ FloatParts64 pr = div_floats(pa, pb, status);
return float16_round_pack_canonical(pr, status);
}
@@ -1796,9 +1796,9 @@ float16 float16_div(float16 a, float16 b, float_status *status)
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_div(float32 a, float32 b, float_status *status)
{
- FloatParts pa = float32_unpack_canonical(a, status);
- FloatParts pb = float32_unpack_canonical(b, status);
- FloatParts pr = div_floats(pa, pb, status);
+ FloatParts64 pa = float32_unpack_canonical(a, status);
+ FloatParts64 pb = float32_unpack_canonical(b, status);
+ FloatParts64 pr = div_floats(pa, pb, status);
return float32_round_pack_canonical(pr, status);
}
@@ -1806,9 +1806,9 @@ soft_f32_div(float32 a, float32 b, float_status *status)
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_div(float64 a, float64 b, float_status *status)
{
- FloatParts pa = float64_unpack_canonical(a, status);
- FloatParts pb = float64_unpack_canonical(b, status);
- FloatParts pr = div_floats(pa, pb, status);
+ FloatParts64 pa = float64_unpack_canonical(a, status);
+ FloatParts64 pb = float64_unpack_canonical(b, status);
+ FloatParts64 pr = div_floats(pa, pb, status);
return float64_round_pack_canonical(pr, status);
}
@@ -1878,9 +1878,9 @@ float64_div(float64 a, float64 b, float_status *s)
bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
{
- FloatParts pa = bfloat16_unpack_canonical(a, status);
- FloatParts pb = bfloat16_unpack_canonical(b, status);
- FloatParts pr = div_floats(pa, pb, status);
+ FloatParts64 pa = bfloat16_unpack_canonical(a, status);
+ FloatParts64 pb = bfloat16_unpack_canonical(b, status);
+ FloatParts64 pr = div_floats(pa, pb, status);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -1896,7 +1896,7 @@ bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
* invalid exceptions and handling the conversion on NaNs.
*/
-static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
+static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
float_status *s)
{
if (dstf->arm_althp) {
@@ -1934,32 +1934,32 @@ static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts p = float16a_unpack_canonical(a, s, fmt16);
- FloatParts pr = float_to_float(p, &float32_params, s);
+ FloatParts64 p = float16a_unpack_canonical(a, s, fmt16);
+ FloatParts64 pr = float_to_float(p, &float32_params, s);
return float32_round_pack_canonical(pr, s);
}
float64 float16_to_float64(float16 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts p = float16a_unpack_canonical(a, s, fmt16);
- FloatParts pr = float_to_float(p, &float64_params, s);
+ FloatParts64 p = float16a_unpack_canonical(a, s, fmt16);
+ FloatParts64 pr = float_to_float(p, &float64_params, s);
return float64_round_pack_canonical(pr, s);
}
float16 float32_to_float16(float32 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts p = float32_unpack_canonical(a, s);
- FloatParts pr = float_to_float(p, fmt16, s);
+ FloatParts64 p = float32_unpack_canonical(a, s);
+ FloatParts64 pr = float_to_float(p, fmt16, s);
return float16a_round_pack_canonical(pr, s, fmt16);
}
static float64 QEMU_SOFTFLOAT_ATTR
soft_float32_to_float64(float32 a, float_status *s)
{
- FloatParts p = float32_unpack_canonical(a, s);
- FloatParts pr = float_to_float(p, &float64_params, s);
+ FloatParts64 p = float32_unpack_canonical(a, s);
+ FloatParts64 pr = float_to_float(p, &float64_params, s);
return float64_round_pack_canonical(pr, s);
}
@@ -1982,43 +1982,43 @@ float64 float32_to_float64(float32 a, float_status *s)
float16 float64_to_float16(float64 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts p = float64_unpack_canonical(a, s);
- FloatParts pr = float_to_float(p, fmt16, s);
+ FloatParts64 p = float64_unpack_canonical(a, s);
+ FloatParts64 pr = float_to_float(p, fmt16, s);
return float16a_round_pack_canonical(pr, s, fmt16);
}
float32 float64_to_float32(float64 a, float_status *s)
{
- FloatParts p = float64_unpack_canonical(a, s);
- FloatParts pr = float_to_float(p, &float32_params, s);
+ FloatParts64 p = float64_unpack_canonical(a, s);
+ FloatParts64 pr = float_to_float(p, &float32_params, s);
return float32_round_pack_canonical(pr, s);
}
float32 bfloat16_to_float32(bfloat16 a, float_status *s)
{
- FloatParts p = bfloat16_unpack_canonical(a, s);
- FloatParts pr = float_to_float(p, &float32_params, s);
+ FloatParts64 p = bfloat16_unpack_canonical(a, s);
+ FloatParts64 pr = float_to_float(p, &float32_params, s);
return float32_round_pack_canonical(pr, s);
}
float64 bfloat16_to_float64(bfloat16 a, float_status *s)
{
- FloatParts p = bfloat16_unpack_canonical(a, s);
- FloatParts pr = float_to_float(p, &float64_params, s);
+ FloatParts64 p = bfloat16_unpack_canonical(a, s);
+ FloatParts64 pr = float_to_float(p, &float64_params, s);
return float64_round_pack_canonical(pr, s);
}
bfloat16 float32_to_bfloat16(float32 a, float_status *s)
{
- FloatParts p = float32_unpack_canonical(a, s);
- FloatParts pr = float_to_float(p, &bfloat16_params, s);
+ FloatParts64 p = float32_unpack_canonical(a, s);
+ FloatParts64 pr = float_to_float(p, &bfloat16_params, s);
return bfloat16_round_pack_canonical(pr, s);
}
bfloat16 float64_to_bfloat16(float64 a, float_status *s)
{
- FloatParts p = float64_unpack_canonical(a, s);
- FloatParts pr = float_to_float(p, &bfloat16_params, s);
+ FloatParts64 p = float64_unpack_canonical(a, s);
+ FloatParts64 pr = float_to_float(p, &bfloat16_params, s);
return bfloat16_round_pack_canonical(pr, s);
}
@@ -2029,7 +2029,7 @@ bfloat16 float64_to_bfloat16(float64 a, float_status *s)
* Arithmetic.
*/
-static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
+static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
int scale, float_status *s)
{
switch (a.cls) {
@@ -2132,22 +2132,22 @@ static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
float16 float16_round_to_int(float16 a, float_status *s)
{
- FloatParts pa = float16_unpack_canonical(a, s);
- FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
+ FloatParts64 pa = float16_unpack_canonical(a, s);
+ FloatParts64 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
return float16_round_pack_canonical(pr, s);
}
float32 float32_round_to_int(float32 a, float_status *s)
{
- FloatParts pa = float32_unpack_canonical(a, s);
- FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
+ FloatParts64 pa = float32_unpack_canonical(a, s);
+ FloatParts64 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
return float32_round_pack_canonical(pr, s);
}
float64 float64_round_to_int(float64 a, float_status *s)
{
- FloatParts pa = float64_unpack_canonical(a, s);
- FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
+ FloatParts64 pa = float64_unpack_canonical(a, s);
+ FloatParts64 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
return float64_round_pack_canonical(pr, s);
}
@@ -2158,8 +2158,8 @@ float64 float64_round_to_int(float64 a, float_status *s)
bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
{
- FloatParts pa = bfloat16_unpack_canonical(a, s);
- FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
+ FloatParts64 pa = bfloat16_unpack_canonical(a, s);
+ FloatParts64 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
return bfloat16_round_pack_canonical(pr, s);
}
@@ -2174,13 +2174,13 @@ bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
* is returned.
*/
-static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode,
+static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
int scale, int64_t min, int64_t max,
float_status *s)
{
uint64_t r;
int orig_flags = get_float_exception_flags(s);
- FloatParts p = round_to_int(in, rmode, scale, s);
+ FloatParts64 p = round_to_int(in, rmode, scale, s);
switch (p.cls) {
case float_class_snan:
@@ -2452,12 +2452,12 @@ int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
* flag.
*/
-static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode,
+static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
int scale, uint64_t max,
float_status *s)
{
int orig_flags = get_float_exception_flags(s);
- FloatParts p = round_to_int(in, rmode, scale, s);
+ FloatParts64 p = round_to_int(in, rmode, scale, s);
uint64_t r;
switch (p.cls) {
@@ -2726,9 +2726,9 @@ uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
* to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*/
-static FloatParts int_to_float(int64_t a, int scale, float_status *status)
+static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
{
- FloatParts r = { .sign = false };
+ FloatParts64 r = { .sign = false };
if (a == 0) {
r.cls = float_class_zero;
@@ -2753,7 +2753,7 @@ static FloatParts int_to_float(int64_t a, int scale, float_status *status)
float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
{
- FloatParts pa = int_to_float(a, scale, status);
+ FloatParts64 pa = int_to_float(a, scale, status);
return float16_round_pack_canonical(pa, status);
}
@@ -2789,7 +2789,7 @@ float16 int8_to_float16(int8_t a, float_status *status)
float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
{
- FloatParts pa = int_to_float(a, scale, status);
+ FloatParts64 pa = int_to_float(a, scale, status);
return float32_round_pack_canonical(pa, status);
}
@@ -2820,7 +2820,7 @@ float32 int16_to_float32(int16_t a, float_status *status)
float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
{
- FloatParts pa = int_to_float(a, scale, status);
+ FloatParts64 pa = int_to_float(a, scale, status);
return float64_round_pack_canonical(pa, status);
}
@@ -2856,7 +2856,7 @@ float64 int16_to_float64(int16_t a, float_status *status)
bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
{
- FloatParts pa = int_to_float(a, scale, status);
+ FloatParts64 pa = int_to_float(a, scale, status);
return bfloat16_round_pack_canonical(pa, status);
}
@@ -2893,9 +2893,9 @@ bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
* IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*/
-static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
+static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
{
- FloatParts r = { .sign = false };
+ FloatParts64 r = { .sign = false };
int shift;
if (a == 0) {
@@ -2913,7 +2913,7 @@ static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
{
- FloatParts pa = uint_to_float(a, scale, status);
+ FloatParts64 pa = uint_to_float(a, scale, status);
return float16_round_pack_canonical(pa, status);
}
@@ -2949,7 +2949,7 @@ float16 uint8_to_float16(uint8_t a, float_status *status)
float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
{
- FloatParts pa = uint_to_float(a, scale, status);
+ FloatParts64 pa = uint_to_float(a, scale, status);
return float32_round_pack_canonical(pa, status);
}
@@ -2980,7 +2980,7 @@ float32 uint16_to_float32(uint16_t a, float_status *status)
float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
{
- FloatParts pa = uint_to_float(a, scale, status);
+ FloatParts64 pa = uint_to_float(a, scale, status);
return float64_round_pack_canonical(pa, status);
}
@@ -3016,7 +3016,7 @@ float64 uint16_to_float64(uint16_t a, float_status *status)
bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
{
- FloatParts pa = uint_to_float(a, scale, status);
+ FloatParts64 pa = uint_to_float(a, scale, status);
return bfloat16_round_pack_canonical(pa, status);
}
@@ -3061,7 +3061,7 @@ bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
* minnummag() and maxnummag() functions correspond to minNumMag()
* and minNumMag() from the IEEE-754 2008.
*/
-static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
+static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
bool ieee, bool ismag, float_status *s)
{
if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
@@ -3136,9 +3136,9 @@ static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
float_status *s) \
{ \
- FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
- FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
- FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
+ FloatParts64 pa = float ## sz ## _unpack_canonical(a, s); \
+ FloatParts64 pb = float ## sz ## _unpack_canonical(b, s); \
+ FloatParts64 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
\
return float ## sz ## _round_pack_canonical(pr, s); \
}
@@ -3169,9 +3169,9 @@ MINMAX(64, maxnummag, false, true, true)
#define BF16_MINMAX(name, ismin, isiee, ismag) \
bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
{ \
- FloatParts pa = bfloat16_unpack_canonical(a, s); \
- FloatParts pb = bfloat16_unpack_canonical(b, s); \
- FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
+ FloatParts64 pa = bfloat16_unpack_canonical(a, s); \
+ FloatParts64 pb = bfloat16_unpack_canonical(b, s); \
+ FloatParts64 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
\
return bfloat16_round_pack_canonical(pr, s); \
}
@@ -3186,7 +3186,7 @@ BF16_MINMAX(maxnummag, false, true, true)
#undef BF16_MINMAX
/* Floating point compare */
-static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
+static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
float_status *s)
{
if (is_nan(a.cls) || is_nan(b.cls)) {
@@ -3247,8 +3247,8 @@ static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
static int attr \
name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
{ \
- FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
- FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
+ FloatParts64 pa = float ## sz ## _unpack_canonical(a, s); \
+ FloatParts64 pb = float ## sz ## _unpack_canonical(b, s); \
return compare_floats(pa, pb, is_quiet, s); \
}
@@ -3349,8 +3349,8 @@ FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
static FloatRelation QEMU_FLATTEN
soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
{
- FloatParts pa = bfloat16_unpack_canonical(a, s);
- FloatParts pb = bfloat16_unpack_canonical(b, s);
+ FloatParts64 pa = bfloat16_unpack_canonical(a, s);
+ FloatParts64 pb = bfloat16_unpack_canonical(b, s);
return compare_floats(pa, pb, is_quiet, s);
}
@@ -3365,16 +3365,16 @@ FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
}
/* Multiply A by 2 raised to the power N. */
-static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
+static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
{
if (unlikely(is_nan(a.cls))) {
return return_nan(a, s);
}
if (a.cls == float_class_normal) {
- /* The largest float type (even though not supported by FloatParts)
+ /* The largest float type (even though not supported by FloatParts64)
* is float128, which has a 15 bit exponent. Bounding N to 16 bits
* still allows rounding to infinity, without allowing overflow
- * within the int32_t that backs FloatParts.exp.
+ * within the int32_t that backs FloatParts64.exp.
*/
n = MIN(MAX(n, -0x10000), 0x10000);
a.exp += n;
@@ -3384,29 +3384,29 @@ static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
float16 float16_scalbn(float16 a, int n, float_status *status)
{
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pr = scalbn_decomposed(pa, n, status);
+ FloatParts64 pa = float16_unpack_canonical(a, status);
+ FloatParts64 pr = scalbn_decomposed(pa, n, status);
return float16_round_pack_canonical(pr, status);
}
float32 float32_scalbn(float32 a, int n, float_status *status)
{
- FloatParts pa = float32_unpack_canonical(a, status);
- FloatParts pr = scalbn_decomposed(pa, n, status);
+ FloatParts64 pa = float32_unpack_canonical(a, status);
+ FloatParts64 pr = scalbn_decomposed(pa, n, status);
return float32_round_pack_canonical(pr, status);
}
float64 float64_scalbn(float64 a, int n, float_status *status)
{
- FloatParts pa = float64_unpack_canonical(a, status);
- FloatParts pr = scalbn_decomposed(pa, n, status);
+ FloatParts64 pa = float64_unpack_canonical(a, status);
+ FloatParts64 pr = scalbn_decomposed(pa, n, status);
return float64_round_pack_canonical(pr, status);
}
bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
{
- FloatParts pa = bfloat16_unpack_canonical(a, status);
- FloatParts pr = scalbn_decomposed(pa, n, status);
+ FloatParts64 pa = bfloat16_unpack_canonical(a, status);
+ FloatParts64 pr = scalbn_decomposed(pa, n, status);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -3422,7 +3422,7 @@ bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
* especially for 64 bit floats.
*/
-static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
+static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
{
uint64_t a_frac, r_frac, s_frac;
int bit, last_bit;
@@ -3482,24 +3482,24 @@ static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
{
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pr = sqrt_float(pa, status, &float16_params);
+ FloatParts64 pa = float16_unpack_canonical(a, status);
+ FloatParts64 pr = sqrt_float(pa, status, &float16_params);
return float16_round_pack_canonical(pr, status);
}
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_sqrt(float32 a, float_status *status)
{
- FloatParts pa = float32_unpack_canonical(a, status);
- FloatParts pr = sqrt_float(pa, status, &float32_params);
+ FloatParts64 pa = float32_unpack_canonical(a, status);
+ FloatParts64 pr = sqrt_float(pa, status, &float32_params);
return float32_round_pack_canonical(pr, status);
}
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_sqrt(float64 a, float_status *status)
{
- FloatParts pa = float64_unpack_canonical(a, status);
- FloatParts pr = sqrt_float(pa, status, &float64_params);
+ FloatParts64 pa = float64_unpack_canonical(a, status);
+ FloatParts64 pr = sqrt_float(pa, status, &float64_params);
return float64_round_pack_canonical(pr, status);
}
@@ -3559,8 +3559,8 @@ float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
{
- FloatParts pa = bfloat16_unpack_canonical(a, status);
- FloatParts pr = sqrt_float(pa, status, &bfloat16_params);
+ FloatParts64 pa = bfloat16_unpack_canonical(a, status);
+ FloatParts64 pr = sqrt_float(pa, status, &bfloat16_params);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -3570,28 +3570,28 @@ bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
float16 float16_default_nan(float_status *status)
{
- FloatParts p = parts_default_nan(status);
+ FloatParts64 p = parts_default_nan(status);
p.frac >>= float16_params.frac_shift;
return float16_pack_raw(p);
}
float32 float32_default_nan(float_status *status)
{
- FloatParts p = parts_default_nan(status);
+ FloatParts64 p = parts_default_nan(status);
p.frac >>= float32_params.frac_shift;
return float32_pack_raw(p);
}
float64 float64_default_nan(float_status *status)
{
- FloatParts p = parts_default_nan(status);
+ FloatParts64 p = parts_default_nan(status);
p.frac >>= float64_params.frac_shift;
return float64_pack_raw(p);
}
float128 float128_default_nan(float_status *status)
{
- FloatParts p = parts_default_nan(status);
+ FloatParts64 p = parts_default_nan(status);
float128 r;
/* Extrapolate from the choices made by parts_default_nan to fill
@@ -3608,7 +3608,7 @@ float128 float128_default_nan(float_status *status)
bfloat16 bfloat16_default_nan(float_status *status)
{
- FloatParts p = parts_default_nan(status);
+ FloatParts64 p = parts_default_nan(status);
p.frac >>= bfloat16_params.frac_shift;
return bfloat16_pack_raw(p);
}
@@ -3619,7 +3619,7 @@ bfloat16 bfloat16_default_nan(float_status *status)
float16 float16_silence_nan(float16 a, float_status *status)
{
- FloatParts p = float16_unpack_raw(a);
+ FloatParts64 p = float16_unpack_raw(a);
p.frac <<= float16_params.frac_shift;
p = parts_silence_nan(p, status);
p.frac >>= float16_params.frac_shift;
@@ -3628,7 +3628,7 @@ float16 float16_silence_nan(float16 a, float_status *status)
float32 float32_silence_nan(float32 a, float_status *status)
{
- FloatParts p = float32_unpack_raw(a);
+ FloatParts64 p = float32_unpack_raw(a);
p.frac <<= float32_params.frac_shift;
p = parts_silence_nan(p, status);
p.frac >>= float32_params.frac_shift;
@@ -3637,7 +3637,7 @@ float32 float32_silence_nan(float32 a, float_status *status)
float64 float64_silence_nan(float64 a, float_status *status)
{
- FloatParts p = float64_unpack_raw(a);
+ FloatParts64 p = float64_unpack_raw(a);
p.frac <<= float64_params.frac_shift;
p = parts_silence_nan(p, status);
p.frac >>= float64_params.frac_shift;
@@ -3646,7 +3646,7 @@ float64 float64_silence_nan(float64 a, float_status *status)
bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
{
- FloatParts p = bfloat16_unpack_raw(a);
+ FloatParts64 p = bfloat16_unpack_raw(a);
p.frac <<= bfloat16_params.frac_shift;
p = parts_silence_nan(p, status);
p.frac >>= bfloat16_params.frac_shift;
@@ -3658,7 +3658,7 @@ bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
| input-denormal exception and return zero. Otherwise just return the value.
*----------------------------------------------------------------------------*/
-static bool parts_squash_denormal(FloatParts p, float_status *status)
+static bool parts_squash_denormal(FloatParts64 p, float_status *status)
{
if (p.exp == 0 && p.frac != 0) {
float_raise(float_flag_input_denormal, status);
@@ -3671,7 +3671,7 @@ static bool parts_squash_denormal(FloatParts p, float_status *status)
float16 float16_squash_input_denormal(float16 a, float_status *status)
{
if (status->flush_inputs_to_zero) {
- FloatParts p = float16_unpack_raw(a);
+ FloatParts64 p = float16_unpack_raw(a);
if (parts_squash_denormal(p, status)) {
return float16_set_sign(float16_zero, p.sign);
}
@@ -3682,7 +3682,7 @@ float16 float16_squash_input_denormal(float16 a, float_status *status)
float32 float32_squash_input_denormal(float32 a, float_status *status)
{
if (status->flush_inputs_to_zero) {
- FloatParts p = float32_unpack_raw(a);
+ FloatParts64 p = float32_unpack_raw(a);
if (parts_squash_denormal(p, status)) {
return float32_set_sign(float32_zero, p.sign);
}
@@ -3693,7 +3693,7 @@ float32 float32_squash_input_denormal(float32 a, float_status *status)
float64 float64_squash_input_denormal(float64 a, float_status *status)
{
if (status->flush_inputs_to_zero) {
- FloatParts p = float64_unpack_raw(a);
+ FloatParts64 p = float64_unpack_raw(a);
if (parts_squash_denormal(p, status)) {
return float64_set_sign(float64_zero, p.sign);
}
@@ -3704,7 +3704,7 @@ float64 float64_squash_input_denormal(float64 a, float_status *status)
bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
{
if (status->flush_inputs_to_zero) {
- FloatParts p = bfloat16_unpack_raw(a);
+ FloatParts64 p = bfloat16_unpack_raw(a);
if (parts_squash_denormal(p, status)) {
return bfloat16_set_sign(bfloat16_zero, p.sign);
}
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index 05cb2ee940..bb928b0b9f 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -129,7 +129,7 @@ static bool parts_is_snan_frac(uint64_t frac, float_status *status)
| The pattern for a default generated deconstructed floating-point NaN.
*----------------------------------------------------------------------------*/
-static FloatParts parts_default_nan(float_status *status)
+static FloatParts64 parts_default_nan(float_status *status)
{
bool sign = 0;
uint64_t frac;
@@ -163,7 +163,7 @@ static FloatParts parts_default_nan(float_status *status)
}
#endif
- return (FloatParts) {
+ return (FloatParts64) {
.cls = float_class_qnan,
.sign = sign,
.exp = INT_MAX,
@@ -176,7 +176,7 @@ static FloatParts parts_default_nan(float_status *status)
| floating-point parts.
*----------------------------------------------------------------------------*/
-static FloatParts parts_silence_nan(FloatParts a, float_status *status)
+static FloatParts64 parts_silence_nan(FloatParts64 a, float_status *status)
{
g_assert(!no_signaling_nans(status));
g_assert(!status->default_nan_mode);
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 16/46] softfloat: Move type-specific pack/unpack routines
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (14 preceding siblings ...)
2021-05-16 12:34 ` [PULL 15/46] softfloat: Rename FloatParts to FloatParts64 Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 17/46] softfloat: Use pointers with parts_default_nan Richard Henderson
` (31 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Philippe Mathieu-Daudé, David Hildenbrand
In preparation from moving sf_canonicalize.
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 109 +++++++++++++++++++++++++-----------------------
1 file changed, 56 insertions(+), 53 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 27b51659c9..398a068b58 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -837,59 +837,6 @@ static FloatParts64 round_canonical(FloatParts64 p, float_status *s,
return p;
}
-/* Explicit FloatFmt version */
-static FloatParts64 float16a_unpack_canonical(float16 f, float_status *s,
- const FloatFmt *params)
-{
- return sf_canonicalize(float16_unpack_raw(f), params, s);
-}
-
-static FloatParts64 float16_unpack_canonical(float16 f, float_status *s)
-{
- return float16a_unpack_canonical(f, s, &float16_params);
-}
-
-static FloatParts64 bfloat16_unpack_canonical(bfloat16 f, float_status *s)
-{
- return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s);
-}
-
-static float16 float16a_round_pack_canonical(FloatParts64 p, float_status *s,
- const FloatFmt *params)
-{
- return float16_pack_raw(round_canonical(p, s, params));
-}
-
-static float16 float16_round_pack_canonical(FloatParts64 p, float_status *s)
-{
- return float16a_round_pack_canonical(p, s, &float16_params);
-}
-
-static bfloat16 bfloat16_round_pack_canonical(FloatParts64 p, float_status *s)
-{
- return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params));
-}
-
-static FloatParts64 float32_unpack_canonical(float32 f, float_status *s)
-{
- return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
-}
-
-static float32 float32_round_pack_canonical(FloatParts64 p, float_status *s)
-{
- return float32_pack_raw(round_canonical(p, s, &float32_params));
-}
-
-static FloatParts64 float64_unpack_canonical(float64 f, float_status *s)
-{
- return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
-}
-
-static float64 float64_round_pack_canonical(FloatParts64 p, float_status *s)
-{
- return float64_pack_raw(round_canonical(p, s, &float64_params));
-}
-
static FloatParts64 return_nan(FloatParts64 a, float_status *s)
{
g_assert(is_nan(a.cls));
@@ -964,6 +911,62 @@ static FloatParts64 pick_nan_muladd(FloatParts64 a, FloatParts64 b, FloatParts64
return a;
}
+/*
+ * Pack/unpack routines with a specific FloatFmt.
+ */
+
+static FloatParts64 float16a_unpack_canonical(float16 f, float_status *s,
+ const FloatFmt *params)
+{
+ return sf_canonicalize(float16_unpack_raw(f), params, s);
+}
+
+static FloatParts64 float16_unpack_canonical(float16 f, float_status *s)
+{
+ return float16a_unpack_canonical(f, s, &float16_params);
+}
+
+static FloatParts64 bfloat16_unpack_canonical(bfloat16 f, float_status *s)
+{
+ return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s);
+}
+
+static float16 float16a_round_pack_canonical(FloatParts64 p, float_status *s,
+ const FloatFmt *params)
+{
+ return float16_pack_raw(round_canonical(p, s, params));
+}
+
+static float16 float16_round_pack_canonical(FloatParts64 p, float_status *s)
+{
+ return float16a_round_pack_canonical(p, s, &float16_params);
+}
+
+static bfloat16 bfloat16_round_pack_canonical(FloatParts64 p, float_status *s)
+{
+ return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params));
+}
+
+static FloatParts64 float32_unpack_canonical(float32 f, float_status *s)
+{
+ return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
+}
+
+static float32 float32_round_pack_canonical(FloatParts64 p, float_status *s)
+{
+ return float32_pack_raw(round_canonical(p, s, &float32_params));
+}
+
+static FloatParts64 float64_unpack_canonical(float64 f, float_status *s)
+{
+ return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
+}
+
+static float64 float64_round_pack_canonical(FloatParts64 p, float_status *s)
+{
+ return float64_pack_raw(round_canonical(p, s, &float64_params));
+}
+
/*
* Returns the result of adding or subtracting the values of the
* floating-point values `a' and `b'. The operation is performed
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 17/46] softfloat: Use pointers with parts_default_nan
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (15 preceding siblings ...)
2021-05-16 12:34 ` [PULL 16/46] softfloat: Move type-specific pack/unpack routines Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 18/46] softfloat: Use pointers with unpack_raw Richard Henderson
` (30 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, David Hildenbrand
At the same time, rename to parts64_default_nan and add a
macro for parts_default_nan. This will be flushed out once
128-bit support is added.
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 47 +++++++++++++++++++++++-----------
fpu/softfloat-specialize.c.inc | 4 +--
2 files changed, 34 insertions(+), 17 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 398a068b58..c7f95961cf 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -650,6 +650,8 @@ static inline float64 float64_pack_raw(FloatParts64 p)
*----------------------------------------------------------------------------*/
#include "softfloat-specialize.c.inc"
+#define parts_default_nan parts64_default_nan
+
/* Canonicalize EXP and FRAC, setting CLS. */
static FloatParts64 sf_canonicalize(FloatParts64 part, const FloatFmt *parm,
float_status *status)
@@ -848,7 +850,8 @@ static FloatParts64 return_nan(FloatParts64 a, float_status *s)
} else if (!s->default_nan_mode) {
return a;
}
- return parts_default_nan(s);
+ parts_default_nan(&a, s);
+ return a;
}
static FloatParts64 pick_nan(FloatParts64 a, FloatParts64 b, float_status *s)
@@ -858,7 +861,7 @@ static FloatParts64 pick_nan(FloatParts64 a, FloatParts64 b, float_status *s)
}
if (s->default_nan_mode) {
- return parts_default_nan(s);
+ parts_default_nan(&a, s);
} else {
if (pickNaN(a.cls, b.cls,
a.frac > b.frac ||
@@ -900,7 +903,8 @@ static FloatParts64 pick_nan_muladd(FloatParts64 a, FloatParts64 b, FloatParts64
a = c;
break;
case 3:
- return parts_default_nan(s);
+ parts_default_nan(&a, s);
+ break;
default:
g_assert_not_reached();
}
@@ -1011,7 +1015,7 @@ static FloatParts64 addsub_floats(FloatParts64 a, FloatParts64 b, bool subtract,
if (a.cls == float_class_inf) {
if (b.cls == float_class_inf) {
float_raise(float_flag_invalid, s);
- return parts_default_nan(s);
+ parts_default_nan(&a, s);
}
return a;
}
@@ -1254,7 +1258,8 @@ static FloatParts64 mul_floats(FloatParts64 a, FloatParts64 b, float_status *s)
if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
(a.cls == float_class_zero && b.cls == float_class_inf)) {
float_raise(float_flag_invalid, s);
- return parts_default_nan(s);
+ parts_default_nan(&a, s);
+ return a;
}
/* Multiply by 0 or Inf */
if (a.cls == float_class_inf || a.cls == float_class_zero) {
@@ -1372,7 +1377,8 @@ static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c
if (inf_zero) {
float_raise(float_flag_invalid, s);
- return parts_default_nan(s);
+ parts_default_nan(&a, s);
+ return a;
}
if (flags & float_muladd_negate_c) {
@@ -1396,11 +1402,11 @@ static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c
if (c.cls == float_class_inf) {
if (p_class == float_class_inf && p_sign != c.sign) {
float_raise(float_flag_invalid, s);
- return parts_default_nan(s);
+ parts_default_nan(&c, s);
} else {
c.sign ^= sign_flip;
- return c;
}
+ return c;
}
if (p_class == float_class_inf) {
@@ -1764,7 +1770,8 @@ static FloatParts64 div_floats(FloatParts64 a, FloatParts64 b, float_status *s)
&&
(a.cls == float_class_inf || a.cls == float_class_zero)) {
float_raise(float_flag_invalid, s);
- return parts_default_nan(s);
+ parts_default_nan(&a, s);
+ return a;
}
/* Inf / x or 0 / x */
if (a.cls == float_class_inf || a.cls == float_class_zero) {
@@ -3438,7 +3445,8 @@ static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *
}
if (a.sign) {
float_raise(float_flag_invalid, s);
- return parts_default_nan(s);
+ parts_default_nan(&a, s);
+ return a;
}
if (a.cls == float_class_inf) {
return a; /* sqrt(+inf) = +inf */
@@ -3573,30 +3581,37 @@ bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
float16 float16_default_nan(float_status *status)
{
- FloatParts64 p = parts_default_nan(status);
+ FloatParts64 p;
+
+ parts_default_nan(&p, status);
p.frac >>= float16_params.frac_shift;
return float16_pack_raw(p);
}
float32 float32_default_nan(float_status *status)
{
- FloatParts64 p = parts_default_nan(status);
+ FloatParts64 p;
+
+ parts_default_nan(&p, status);
p.frac >>= float32_params.frac_shift;
return float32_pack_raw(p);
}
float64 float64_default_nan(float_status *status)
{
- FloatParts64 p = parts_default_nan(status);
+ FloatParts64 p;
+
+ parts_default_nan(&p, status);
p.frac >>= float64_params.frac_shift;
return float64_pack_raw(p);
}
float128 float128_default_nan(float_status *status)
{
- FloatParts64 p = parts_default_nan(status);
+ FloatParts64 p;
float128 r;
+ parts_default_nan(&p, status);
/* Extrapolate from the choices made by parts_default_nan to fill
* in the quad-floating format. If the low bit is set, assume we
* want to set all non-snan bits.
@@ -3611,7 +3626,9 @@ float128 float128_default_nan(float_status *status)
bfloat16 bfloat16_default_nan(float_status *status)
{
- FloatParts64 p = parts_default_nan(status);
+ FloatParts64 p;
+
+ parts_default_nan(&p, status);
p.frac >>= bfloat16_params.frac_shift;
return bfloat16_pack_raw(p);
}
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index bb928b0b9f..47c3652d63 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -129,7 +129,7 @@ static bool parts_is_snan_frac(uint64_t frac, float_status *status)
| The pattern for a default generated deconstructed floating-point NaN.
*----------------------------------------------------------------------------*/
-static FloatParts64 parts_default_nan(float_status *status)
+static void parts64_default_nan(FloatParts64 *p, float_status *status)
{
bool sign = 0;
uint64_t frac;
@@ -163,7 +163,7 @@ static FloatParts64 parts_default_nan(float_status *status)
}
#endif
- return (FloatParts64) {
+ *p = (FloatParts64) {
.cls = float_class_qnan,
.sign = sign,
.exp = INT_MAX,
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 18/46] softfloat: Use pointers with unpack_raw
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (16 preceding siblings ...)
2021-05-16 12:34 ` [PULL 17/46] softfloat: Use pointers with parts_default_nan Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 19/46] softfloat: Use pointers with ftype_unpack_raw Richard Henderson
` (29 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée, Philippe Mathieu-Daudé
At the same time, rename to unpack_raw64.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 29 +++++++++++++++++++----------
1 file changed, 19 insertions(+), 10 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index c7f95961cf..5ff9368012 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -580,36 +580,45 @@ static const FloatFmt float64_params = {
};
/* Unpack a float to parts, but do not canonicalize. */
-static inline FloatParts64 unpack_raw(FloatFmt fmt, uint64_t raw)
+static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
{
- const int sign_pos = fmt.frac_size + fmt.exp_size;
+ const int f_size = fmt->frac_size;
+ const int e_size = fmt->exp_size;
- return (FloatParts64) {
+ *r = (FloatParts64) {
.cls = float_class_unclassified,
- .sign = extract64(raw, sign_pos, 1),
- .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
- .frac = extract64(raw, 0, fmt.frac_size),
+ .sign = extract64(raw, f_size + e_size, 1),
+ .exp = extract64(raw, f_size, e_size),
+ .frac = extract64(raw, 0, f_size)
};
}
static inline FloatParts64 float16_unpack_raw(float16 f)
{
- return unpack_raw(float16_params, f);
+ FloatParts64 p;
+ unpack_raw64(&p, &float16_params, f);
+ return p;
}
static inline FloatParts64 bfloat16_unpack_raw(bfloat16 f)
{
- return unpack_raw(bfloat16_params, f);
+ FloatParts64 p;
+ unpack_raw64(&p, &bfloat16_params, f);
+ return p;
}
static inline FloatParts64 float32_unpack_raw(float32 f)
{
- return unpack_raw(float32_params, f);
+ FloatParts64 p;
+ unpack_raw64(&p, &float32_params, f);
+ return p;
}
static inline FloatParts64 float64_unpack_raw(float64 f)
{
- return unpack_raw(float64_params, f);
+ FloatParts64 p;
+ unpack_raw64(&p, &float64_params, f);
+ return p;
}
/* Pack a float from parts, but do not canonicalize. */
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 19/46] softfloat: Use pointers with ftype_unpack_raw
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (17 preceding siblings ...)
2021-05-16 12:34 ` [PULL 18/46] softfloat: Use pointers with unpack_raw Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 20/46] softfloat: Use pointers with pack_raw Richard Henderson
` (28 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 76 +++++++++++++++++++++++++++++++------------------
1 file changed, 48 insertions(+), 28 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 5ff9368012..5a736a46cf 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -593,32 +593,24 @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
};
}
-static inline FloatParts64 float16_unpack_raw(float16 f)
+static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
{
- FloatParts64 p;
- unpack_raw64(&p, &float16_params, f);
- return p;
+ unpack_raw64(p, &float16_params, f);
}
-static inline FloatParts64 bfloat16_unpack_raw(bfloat16 f)
+static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
{
- FloatParts64 p;
- unpack_raw64(&p, &bfloat16_params, f);
- return p;
+ unpack_raw64(p, &bfloat16_params, f);
}
-static inline FloatParts64 float32_unpack_raw(float32 f)
+static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
{
- FloatParts64 p;
- unpack_raw64(&p, &float32_params, f);
- return p;
+ unpack_raw64(p, &float32_params, f);
}
-static inline FloatParts64 float64_unpack_raw(float64 f)
+static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
{
- FloatParts64 p;
- unpack_raw64(&p, &float64_params, f);
- return p;
+ unpack_raw64(p, &float64_params, f);
}
/* Pack a float from parts, but do not canonicalize. */
@@ -931,7 +923,10 @@ static FloatParts64 pick_nan_muladd(FloatParts64 a, FloatParts64 b, FloatParts64
static FloatParts64 float16a_unpack_canonical(float16 f, float_status *s,
const FloatFmt *params)
{
- return sf_canonicalize(float16_unpack_raw(f), params, s);
+ FloatParts64 p;
+
+ float16_unpack_raw(&p, f);
+ return sf_canonicalize(p, params, s);
}
static FloatParts64 float16_unpack_canonical(float16 f, float_status *s)
@@ -941,7 +936,10 @@ static FloatParts64 float16_unpack_canonical(float16 f, float_status *s)
static FloatParts64 bfloat16_unpack_canonical(bfloat16 f, float_status *s)
{
- return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s);
+ FloatParts64 p;
+
+ bfloat16_unpack_raw(&p, f);
+ return sf_canonicalize(p, &bfloat16_params, s);
}
static float16 float16a_round_pack_canonical(FloatParts64 p, float_status *s,
@@ -962,7 +960,10 @@ static bfloat16 bfloat16_round_pack_canonical(FloatParts64 p, float_status *s)
static FloatParts64 float32_unpack_canonical(float32 f, float_status *s)
{
- return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
+ FloatParts64 p;
+
+ float32_unpack_raw(&p, f);
+ return sf_canonicalize(p, &float32_params, s);
}
static float32 float32_round_pack_canonical(FloatParts64 p, float_status *s)
@@ -972,7 +973,10 @@ static float32 float32_round_pack_canonical(FloatParts64 p, float_status *s)
static FloatParts64 float64_unpack_canonical(float64 f, float_status *s)
{
- return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
+ FloatParts64 p;
+
+ float64_unpack_raw(&p, f);
+ return sf_canonicalize(p, &float64_params, s);
}
static float64 float64_round_pack_canonical(FloatParts64 p, float_status *s)
@@ -3648,7 +3652,9 @@ bfloat16 bfloat16_default_nan(float_status *status)
float16 float16_silence_nan(float16 a, float_status *status)
{
- FloatParts64 p = float16_unpack_raw(a);
+ FloatParts64 p;
+
+ float16_unpack_raw(&p, a);
p.frac <<= float16_params.frac_shift;
p = parts_silence_nan(p, status);
p.frac >>= float16_params.frac_shift;
@@ -3657,7 +3663,9 @@ float16 float16_silence_nan(float16 a, float_status *status)
float32 float32_silence_nan(float32 a, float_status *status)
{
- FloatParts64 p = float32_unpack_raw(a);
+ FloatParts64 p;
+
+ float32_unpack_raw(&p, a);
p.frac <<= float32_params.frac_shift;
p = parts_silence_nan(p, status);
p.frac >>= float32_params.frac_shift;
@@ -3666,7 +3674,9 @@ float32 float32_silence_nan(float32 a, float_status *status)
float64 float64_silence_nan(float64 a, float_status *status)
{
- FloatParts64 p = float64_unpack_raw(a);
+ FloatParts64 p;
+
+ float64_unpack_raw(&p, a);
p.frac <<= float64_params.frac_shift;
p = parts_silence_nan(p, status);
p.frac >>= float64_params.frac_shift;
@@ -3675,7 +3685,9 @@ float64 float64_silence_nan(float64 a, float_status *status)
bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
{
- FloatParts64 p = bfloat16_unpack_raw(a);
+ FloatParts64 p;
+
+ bfloat16_unpack_raw(&p, a);
p.frac <<= bfloat16_params.frac_shift;
p = parts_silence_nan(p, status);
p.frac >>= bfloat16_params.frac_shift;
@@ -3700,7 +3712,9 @@ static bool parts_squash_denormal(FloatParts64 p, float_status *status)
float16 float16_squash_input_denormal(float16 a, float_status *status)
{
if (status->flush_inputs_to_zero) {
- FloatParts64 p = float16_unpack_raw(a);
+ FloatParts64 p;
+
+ float16_unpack_raw(&p, a);
if (parts_squash_denormal(p, status)) {
return float16_set_sign(float16_zero, p.sign);
}
@@ -3711,7 +3725,9 @@ float16 float16_squash_input_denormal(float16 a, float_status *status)
float32 float32_squash_input_denormal(float32 a, float_status *status)
{
if (status->flush_inputs_to_zero) {
- FloatParts64 p = float32_unpack_raw(a);
+ FloatParts64 p;
+
+ float32_unpack_raw(&p, a);
if (parts_squash_denormal(p, status)) {
return float32_set_sign(float32_zero, p.sign);
}
@@ -3722,7 +3738,9 @@ float32 float32_squash_input_denormal(float32 a, float_status *status)
float64 float64_squash_input_denormal(float64 a, float_status *status)
{
if (status->flush_inputs_to_zero) {
- FloatParts64 p = float64_unpack_raw(a);
+ FloatParts64 p;
+
+ float64_unpack_raw(&p, a);
if (parts_squash_denormal(p, status)) {
return float64_set_sign(float64_zero, p.sign);
}
@@ -3733,7 +3751,9 @@ float64 float64_squash_input_denormal(float64 a, float_status *status)
bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
{
if (status->flush_inputs_to_zero) {
- FloatParts64 p = bfloat16_unpack_raw(a);
+ FloatParts64 p;
+
+ bfloat16_unpack_raw(&p, a);
if (parts_squash_denormal(p, status)) {
return bfloat16_set_sign(bfloat16_zero, p.sign);
}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 20/46] softfloat: Use pointers with pack_raw
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (18 preceding siblings ...)
2021-05-16 12:34 ` [PULL 19/46] softfloat: Use pointers with ftype_unpack_raw Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 21/46] softfloat: Use pointers with ftype_pack_raw Richard Henderson
` (27 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
At the same time, rename to pack_raw64.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 5a736a46cf..b59b777bca 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -614,31 +614,36 @@ static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
}
/* Pack a float from parts, but do not canonicalize. */
-static inline uint64_t pack_raw(FloatFmt fmt, FloatParts64 p)
+static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
{
- const int sign_pos = fmt.frac_size + fmt.exp_size;
- uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
- return deposit64(ret, sign_pos, 1, p.sign);
+ const int f_size = fmt->frac_size;
+ const int e_size = fmt->exp_size;
+ uint64_t ret;
+
+ ret = (uint64_t)p->sign << (f_size + e_size);
+ ret = deposit64(ret, f_size, e_size, p->exp);
+ ret = deposit64(ret, 0, f_size, p->frac);
+ return ret;
}
static inline float16 float16_pack_raw(FloatParts64 p)
{
- return make_float16(pack_raw(float16_params, p));
+ return make_float16(pack_raw64(&p, &float16_params));
}
static inline bfloat16 bfloat16_pack_raw(FloatParts64 p)
{
- return pack_raw(bfloat16_params, p);
+ return pack_raw64(&p, &bfloat16_params);
}
static inline float32 float32_pack_raw(FloatParts64 p)
{
- return make_float32(pack_raw(float32_params, p));
+ return make_float32(pack_raw64(&p, &float32_params));
}
static inline float64 float64_pack_raw(FloatParts64 p)
{
- return make_float64(pack_raw(float64_params, p));
+ return make_float64(pack_raw64(&p, &float64_params));
}
/*----------------------------------------------------------------------------
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 21/46] softfloat: Use pointers with ftype_pack_raw
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (19 preceding siblings ...)
2021-05-16 12:34 ` [PULL 20/46] softfloat: Use pointers with pack_raw Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 22/46] softfloat: Use pointers with ftype_unpack_canonical Richard Henderson
` (26 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 44 ++++++++++++++++++++++++--------------------
1 file changed, 24 insertions(+), 20 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index b59b777bca..e02cbafaf9 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -626,24 +626,24 @@ static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
return ret;
}
-static inline float16 float16_pack_raw(FloatParts64 p)
+static inline float16 float16_pack_raw(const FloatParts64 *p)
{
- return make_float16(pack_raw64(&p, &float16_params));
+ return make_float16(pack_raw64(p, &float16_params));
}
-static inline bfloat16 bfloat16_pack_raw(FloatParts64 p)
+static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
{
- return pack_raw64(&p, &bfloat16_params);
+ return pack_raw64(p, &bfloat16_params);
}
-static inline float32 float32_pack_raw(FloatParts64 p)
+static inline float32 float32_pack_raw(const FloatParts64 *p)
{
- return make_float32(pack_raw64(&p, &float32_params));
+ return make_float32(pack_raw64(p, &float32_params));
}
-static inline float64 float64_pack_raw(FloatParts64 p)
+static inline float64 float64_pack_raw(const FloatParts64 *p)
{
- return make_float64(pack_raw64(&p, &float64_params));
+ return make_float64(pack_raw64(p, &float64_params));
}
/*----------------------------------------------------------------------------
@@ -950,7 +950,8 @@ static FloatParts64 bfloat16_unpack_canonical(bfloat16 f, float_status *s)
static float16 float16a_round_pack_canonical(FloatParts64 p, float_status *s,
const FloatFmt *params)
{
- return float16_pack_raw(round_canonical(p, s, params));
+ p = round_canonical(p, s, params);
+ return float16_pack_raw(&p);
}
static float16 float16_round_pack_canonical(FloatParts64 p, float_status *s)
@@ -960,7 +961,8 @@ static float16 float16_round_pack_canonical(FloatParts64 p, float_status *s)
static bfloat16 bfloat16_round_pack_canonical(FloatParts64 p, float_status *s)
{
- return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params));
+ p = round_canonical(p, s, &bfloat16_params);
+ return bfloat16_pack_raw(&p);
}
static FloatParts64 float32_unpack_canonical(float32 f, float_status *s)
@@ -973,7 +975,8 @@ static FloatParts64 float32_unpack_canonical(float32 f, float_status *s)
static float32 float32_round_pack_canonical(FloatParts64 p, float_status *s)
{
- return float32_pack_raw(round_canonical(p, s, &float32_params));
+ p = round_canonical(p, s, &float32_params);
+ return float32_pack_raw(&p);
}
static FloatParts64 float64_unpack_canonical(float64 f, float_status *s)
@@ -986,7 +989,8 @@ static FloatParts64 float64_unpack_canonical(float64 f, float_status *s)
static float64 float64_round_pack_canonical(FloatParts64 p, float_status *s)
{
- return float64_pack_raw(round_canonical(p, s, &float64_params));
+ p = round_canonical(p, s, &float64_params);
+ return float64_pack_raw(&p);
}
/*
@@ -3603,7 +3607,7 @@ float16 float16_default_nan(float_status *status)
parts_default_nan(&p, status);
p.frac >>= float16_params.frac_shift;
- return float16_pack_raw(p);
+ return float16_pack_raw(&p);
}
float32 float32_default_nan(float_status *status)
@@ -3612,7 +3616,7 @@ float32 float32_default_nan(float_status *status)
parts_default_nan(&p, status);
p.frac >>= float32_params.frac_shift;
- return float32_pack_raw(p);
+ return float32_pack_raw(&p);
}
float64 float64_default_nan(float_status *status)
@@ -3621,7 +3625,7 @@ float64 float64_default_nan(float_status *status)
parts_default_nan(&p, status);
p.frac >>= float64_params.frac_shift;
- return float64_pack_raw(p);
+ return float64_pack_raw(&p);
}
float128 float128_default_nan(float_status *status)
@@ -3648,7 +3652,7 @@ bfloat16 bfloat16_default_nan(float_status *status)
parts_default_nan(&p, status);
p.frac >>= bfloat16_params.frac_shift;
- return bfloat16_pack_raw(p);
+ return bfloat16_pack_raw(&p);
}
/*----------------------------------------------------------------------------
@@ -3663,7 +3667,7 @@ float16 float16_silence_nan(float16 a, float_status *status)
p.frac <<= float16_params.frac_shift;
p = parts_silence_nan(p, status);
p.frac >>= float16_params.frac_shift;
- return float16_pack_raw(p);
+ return float16_pack_raw(&p);
}
float32 float32_silence_nan(float32 a, float_status *status)
@@ -3674,7 +3678,7 @@ float32 float32_silence_nan(float32 a, float_status *status)
p.frac <<= float32_params.frac_shift;
p = parts_silence_nan(p, status);
p.frac >>= float32_params.frac_shift;
- return float32_pack_raw(p);
+ return float32_pack_raw(&p);
}
float64 float64_silence_nan(float64 a, float_status *status)
@@ -3685,7 +3689,7 @@ float64 float64_silence_nan(float64 a, float_status *status)
p.frac <<= float64_params.frac_shift;
p = parts_silence_nan(p, status);
p.frac >>= float64_params.frac_shift;
- return float64_pack_raw(p);
+ return float64_pack_raw(&p);
}
bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
@@ -3696,7 +3700,7 @@ bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
p.frac <<= bfloat16_params.frac_shift;
p = parts_silence_nan(p, status);
p.frac >>= bfloat16_params.frac_shift;
- return bfloat16_pack_raw(p);
+ return bfloat16_pack_raw(&p);
}
/*----------------------------------------------------------------------------
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 22/46] softfloat: Use pointers with ftype_unpack_canonical
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (20 preceding siblings ...)
2021-05-16 12:34 ` [PULL 21/46] softfloat: Use pointers with ftype_pack_raw Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 23/46] softfloat: Use pointers with ftype_round_pack_canonical Richard Henderson
` (25 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 509 ++++++++++++++++++++++++++++++------------------
1 file changed, 320 insertions(+), 189 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index e02cbafaf9..e53d4a138f 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -925,26 +925,24 @@ static FloatParts64 pick_nan_muladd(FloatParts64 a, FloatParts64 b, FloatParts64
* Pack/unpack routines with a specific FloatFmt.
*/
-static FloatParts64 float16a_unpack_canonical(float16 f, float_status *s,
- const FloatFmt *params)
+static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
+ float_status *s, const FloatFmt *params)
{
- FloatParts64 p;
-
- float16_unpack_raw(&p, f);
- return sf_canonicalize(p, params, s);
+ float16_unpack_raw(p, f);
+ *p = sf_canonicalize(*p, params, s);
}
-static FloatParts64 float16_unpack_canonical(float16 f, float_status *s)
+static void float16_unpack_canonical(FloatParts64 *p, float16 f,
+ float_status *s)
{
- return float16a_unpack_canonical(f, s, &float16_params);
+ float16a_unpack_canonical(p, f, s, &float16_params);
}
-static FloatParts64 bfloat16_unpack_canonical(bfloat16 f, float_status *s)
+static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
+ float_status *s)
{
- FloatParts64 p;
-
- bfloat16_unpack_raw(&p, f);
- return sf_canonicalize(p, &bfloat16_params, s);
+ bfloat16_unpack_raw(p, f);
+ *p = sf_canonicalize(*p, &bfloat16_params, s);
}
static float16 float16a_round_pack_canonical(FloatParts64 p, float_status *s,
@@ -965,12 +963,11 @@ static bfloat16 bfloat16_round_pack_canonical(FloatParts64 p, float_status *s)
return bfloat16_pack_raw(&p);
}
-static FloatParts64 float32_unpack_canonical(float32 f, float_status *s)
+static void float32_unpack_canonical(FloatParts64 *p, float32 f,
+ float_status *s)
{
- FloatParts64 p;
-
- float32_unpack_raw(&p, f);
- return sf_canonicalize(p, &float32_params, s);
+ float32_unpack_raw(p, f);
+ *p = sf_canonicalize(*p, &float32_params, s);
}
static float32 float32_round_pack_canonical(FloatParts64 p, float_status *s)
@@ -979,12 +976,11 @@ static float32 float32_round_pack_canonical(FloatParts64 p, float_status *s)
return float32_pack_raw(&p);
}
-static FloatParts64 float64_unpack_canonical(float64 f, float_status *s)
+static void float64_unpack_canonical(FloatParts64 *p, float64 f,
+ float_status *s)
{
- FloatParts64 p;
-
- float64_unpack_raw(&p, f);
- return sf_canonicalize(p, &float64_params, s);
+ float64_unpack_raw(p, f);
+ *p = sf_canonicalize(*p, &float64_params, s);
}
static float64 float64_round_pack_canonical(FloatParts64 p, float_status *s)
@@ -1091,18 +1087,22 @@ static FloatParts64 addsub_floats(FloatParts64 a, FloatParts64 b, bool subtract,
float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
{
- FloatParts64 pa = float16_unpack_canonical(a, status);
- FloatParts64 pb = float16_unpack_canonical(b, status);
- FloatParts64 pr = addsub_floats(pa, pb, false, status);
+ FloatParts64 pa, pb, pr;
+
+ float16_unpack_canonical(&pa, a, status);
+ float16_unpack_canonical(&pb, b, status);
+ pr = addsub_floats(pa, pb, false, status);
return float16_round_pack_canonical(pr, status);
}
float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
{
- FloatParts64 pa = float16_unpack_canonical(a, status);
- FloatParts64 pb = float16_unpack_canonical(b, status);
- FloatParts64 pr = addsub_floats(pa, pb, true, status);
+ FloatParts64 pa, pb, pr;
+
+ float16_unpack_canonical(&pa, a, status);
+ float16_unpack_canonical(&pb, b, status);
+ pr = addsub_floats(pa, pb, true, status);
return float16_round_pack_canonical(pr, status);
}
@@ -1110,9 +1110,11 @@ float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
{
- FloatParts64 pa = float32_unpack_canonical(a, status);
- FloatParts64 pb = float32_unpack_canonical(b, status);
- FloatParts64 pr = addsub_floats(pa, pb, subtract, status);
+ FloatParts64 pa, pb, pr;
+
+ float32_unpack_canonical(&pa, a, status);
+ float32_unpack_canonical(&pb, b, status);
+ pr = addsub_floats(pa, pb, subtract, status);
return float32_round_pack_canonical(pr, status);
}
@@ -1130,9 +1132,11 @@ static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
{
- FloatParts64 pa = float64_unpack_canonical(a, status);
- FloatParts64 pb = float64_unpack_canonical(b, status);
- FloatParts64 pr = addsub_floats(pa, pb, subtract, status);
+ FloatParts64 pa, pb, pr;
+
+ float64_unpack_canonical(&pa, a, status);
+ float64_unpack_canonical(&pb, b, status);
+ pr = addsub_floats(pa, pb, subtract, status);
return float64_round_pack_canonical(pr, status);
}
@@ -1228,18 +1232,22 @@ float64_sub(float64 a, float64 b, float_status *s)
*/
bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
{
- FloatParts64 pa = bfloat16_unpack_canonical(a, status);
- FloatParts64 pb = bfloat16_unpack_canonical(b, status);
- FloatParts64 pr = addsub_floats(pa, pb, false, status);
+ FloatParts64 pa, pb, pr;
+
+ bfloat16_unpack_canonical(&pa, a, status);
+ bfloat16_unpack_canonical(&pb, b, status);
+ pr = addsub_floats(pa, pb, false, status);
return bfloat16_round_pack_canonical(pr, status);
}
bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
{
- FloatParts64 pa = bfloat16_unpack_canonical(a, status);
- FloatParts64 pb = bfloat16_unpack_canonical(b, status);
- FloatParts64 pr = addsub_floats(pa, pb, true, status);
+ FloatParts64 pa, pb, pr;
+
+ bfloat16_unpack_canonical(&pa, a, status);
+ bfloat16_unpack_canonical(&pb, b, status);
+ pr = addsub_floats(pa, pb, true, status);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -1297,9 +1305,11 @@ static FloatParts64 mul_floats(FloatParts64 a, FloatParts64 b, float_status *s)
float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
{
- FloatParts64 pa = float16_unpack_canonical(a, status);
- FloatParts64 pb = float16_unpack_canonical(b, status);
- FloatParts64 pr = mul_floats(pa, pb, status);
+ FloatParts64 pa, pb, pr;
+
+ float16_unpack_canonical(&pa, a, status);
+ float16_unpack_canonical(&pb, b, status);
+ pr = mul_floats(pa, pb, status);
return float16_round_pack_canonical(pr, status);
}
@@ -1307,9 +1317,11 @@ float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_mul(float32 a, float32 b, float_status *status)
{
- FloatParts64 pa = float32_unpack_canonical(a, status);
- FloatParts64 pb = float32_unpack_canonical(b, status);
- FloatParts64 pr = mul_floats(pa, pb, status);
+ FloatParts64 pa, pb, pr;
+
+ float32_unpack_canonical(&pa, a, status);
+ float32_unpack_canonical(&pb, b, status);
+ pr = mul_floats(pa, pb, status);
return float32_round_pack_canonical(pr, status);
}
@@ -1317,9 +1329,11 @@ soft_f32_mul(float32 a, float32 b, float_status *status)
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_mul(float64 a, float64 b, float_status *status)
{
- FloatParts64 pa = float64_unpack_canonical(a, status);
- FloatParts64 pb = float64_unpack_canonical(b, status);
- FloatParts64 pr = mul_floats(pa, pb, status);
+ FloatParts64 pa, pb, pr;
+
+ float64_unpack_canonical(&pa, a, status);
+ float64_unpack_canonical(&pb, b, status);
+ pr = mul_floats(pa, pb, status);
return float64_round_pack_canonical(pr, status);
}
@@ -1355,9 +1369,11 @@ float64_mul(float64 a, float64 b, float_status *s)
bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
{
- FloatParts64 pa = bfloat16_unpack_canonical(a, status);
- FloatParts64 pb = bfloat16_unpack_canonical(b, status);
- FloatParts64 pr = mul_floats(pa, pb, status);
+ FloatParts64 pa, pb, pr;
+
+ bfloat16_unpack_canonical(&pa, a, status);
+ bfloat16_unpack_canonical(&pb, b, status);
+ pr = mul_floats(pa, pb, status);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -1551,10 +1567,12 @@ static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c
float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
int flags, float_status *status)
{
- FloatParts64 pa = float16_unpack_canonical(a, status);
- FloatParts64 pb = float16_unpack_canonical(b, status);
- FloatParts64 pc = float16_unpack_canonical(c, status);
- FloatParts64 pr = muladd_floats(pa, pb, pc, flags, status);
+ FloatParts64 pa, pb, pc, pr;
+
+ float16_unpack_canonical(&pa, a, status);
+ float16_unpack_canonical(&pb, b, status);
+ float16_unpack_canonical(&pc, c, status);
+ pr = muladd_floats(pa, pb, pc, flags, status);
return float16_round_pack_canonical(pr, status);
}
@@ -1563,10 +1581,12 @@ static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
float_status *status)
{
- FloatParts64 pa = float32_unpack_canonical(a, status);
- FloatParts64 pb = float32_unpack_canonical(b, status);
- FloatParts64 pc = float32_unpack_canonical(c, status);
- FloatParts64 pr = muladd_floats(pa, pb, pc, flags, status);
+ FloatParts64 pa, pb, pc, pr;
+
+ float32_unpack_canonical(&pa, a, status);
+ float32_unpack_canonical(&pb, b, status);
+ float32_unpack_canonical(&pc, c, status);
+ pr = muladd_floats(pa, pb, pc, flags, status);
return float32_round_pack_canonical(pr, status);
}
@@ -1575,10 +1595,12 @@ static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
float_status *status)
{
- FloatParts64 pa = float64_unpack_canonical(a, status);
- FloatParts64 pb = float64_unpack_canonical(b, status);
- FloatParts64 pc = float64_unpack_canonical(c, status);
- FloatParts64 pr = muladd_floats(pa, pb, pc, flags, status);
+ FloatParts64 pa, pb, pc, pr;
+
+ float64_unpack_canonical(&pa, a, status);
+ float64_unpack_canonical(&pb, b, status);
+ float64_unpack_canonical(&pc, c, status);
+ pr = muladd_floats(pa, pb, pc, flags, status);
return float64_round_pack_canonical(pr, status);
}
@@ -1736,10 +1758,12 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
int flags, float_status *status)
{
- FloatParts64 pa = bfloat16_unpack_canonical(a, status);
- FloatParts64 pb = bfloat16_unpack_canonical(b, status);
- FloatParts64 pc = bfloat16_unpack_canonical(c, status);
- FloatParts64 pr = muladd_floats(pa, pb, pc, flags, status);
+ FloatParts64 pa, pb, pc, pr;
+
+ bfloat16_unpack_canonical(&pa, a, status);
+ bfloat16_unpack_canonical(&pb, b, status);
+ bfloat16_unpack_canonical(&pc, c, status);
+ pr = muladd_floats(pa, pb, pc, flags, status);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -1818,9 +1842,11 @@ static FloatParts64 div_floats(FloatParts64 a, FloatParts64 b, float_status *s)
float16 float16_div(float16 a, float16 b, float_status *status)
{
- FloatParts64 pa = float16_unpack_canonical(a, status);
- FloatParts64 pb = float16_unpack_canonical(b, status);
- FloatParts64 pr = div_floats(pa, pb, status);
+ FloatParts64 pa, pb, pr;
+
+ float16_unpack_canonical(&pa, a, status);
+ float16_unpack_canonical(&pb, b, status);
+ pr = div_floats(pa, pb, status);
return float16_round_pack_canonical(pr, status);
}
@@ -1828,9 +1854,11 @@ float16 float16_div(float16 a, float16 b, float_status *status)
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_div(float32 a, float32 b, float_status *status)
{
- FloatParts64 pa = float32_unpack_canonical(a, status);
- FloatParts64 pb = float32_unpack_canonical(b, status);
- FloatParts64 pr = div_floats(pa, pb, status);
+ FloatParts64 pa, pb, pr;
+
+ float32_unpack_canonical(&pa, a, status);
+ float32_unpack_canonical(&pb, b, status);
+ pr = div_floats(pa, pb, status);
return float32_round_pack_canonical(pr, status);
}
@@ -1838,9 +1866,11 @@ soft_f32_div(float32 a, float32 b, float_status *status)
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_div(float64 a, float64 b, float_status *status)
{
- FloatParts64 pa = float64_unpack_canonical(a, status);
- FloatParts64 pb = float64_unpack_canonical(b, status);
- FloatParts64 pr = div_floats(pa, pb, status);
+ FloatParts64 pa, pb, pr;
+
+ float64_unpack_canonical(&pa, a, status);
+ float64_unpack_canonical(&pb, b, status);
+ pr = div_floats(pa, pb, status);
return float64_round_pack_canonical(pr, status);
}
@@ -1910,9 +1940,11 @@ float64_div(float64 a, float64 b, float_status *s)
bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
{
- FloatParts64 pa = bfloat16_unpack_canonical(a, status);
- FloatParts64 pb = bfloat16_unpack_canonical(b, status);
- FloatParts64 pr = div_floats(pa, pb, status);
+ FloatParts64 pa, pb, pr;
+
+ bfloat16_unpack_canonical(&pa, a, status);
+ bfloat16_unpack_canonical(&pb, b, status);
+ pr = div_floats(pa, pb, status);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -1966,32 +1998,40 @@ static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts64 p = float16a_unpack_canonical(a, s, fmt16);
- FloatParts64 pr = float_to_float(p, &float32_params, s);
+ FloatParts64 pa, pr;
+
+ float16a_unpack_canonical(&pa, a, s, fmt16);
+ pr = float_to_float(pa, &float32_params, s);
return float32_round_pack_canonical(pr, s);
}
float64 float16_to_float64(float16 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts64 p = float16a_unpack_canonical(a, s, fmt16);
- FloatParts64 pr = float_to_float(p, &float64_params, s);
+ FloatParts64 pa, pr;
+
+ float16a_unpack_canonical(&pa, a, s, fmt16);
+ pr = float_to_float(pa, &float64_params, s);
return float64_round_pack_canonical(pr, s);
}
float16 float32_to_float16(float32 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts64 p = float32_unpack_canonical(a, s);
- FloatParts64 pr = float_to_float(p, fmt16, s);
+ FloatParts64 pa, pr;
+
+ float32_unpack_canonical(&pa, a, s);
+ pr = float_to_float(pa, fmt16, s);
return float16a_round_pack_canonical(pr, s, fmt16);
}
static float64 QEMU_SOFTFLOAT_ATTR
soft_float32_to_float64(float32 a, float_status *s)
{
- FloatParts64 p = float32_unpack_canonical(a, s);
- FloatParts64 pr = float_to_float(p, &float64_params, s);
+ FloatParts64 pa, pr;
+
+ float32_unpack_canonical(&pa, a, s);
+ pr = float_to_float(pa, &float64_params, s);
return float64_round_pack_canonical(pr, s);
}
@@ -2014,43 +2054,55 @@ float64 float32_to_float64(float32 a, float_status *s)
float16 float64_to_float16(float64 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts64 p = float64_unpack_canonical(a, s);
- FloatParts64 pr = float_to_float(p, fmt16, s);
+ FloatParts64 pa, pr;
+
+ float64_unpack_canonical(&pa, a, s);
+ pr = float_to_float(pa, fmt16, s);
return float16a_round_pack_canonical(pr, s, fmt16);
}
float32 float64_to_float32(float64 a, float_status *s)
{
- FloatParts64 p = float64_unpack_canonical(a, s);
- FloatParts64 pr = float_to_float(p, &float32_params, s);
+ FloatParts64 pa, pr;
+
+ float64_unpack_canonical(&pa, a, s);
+ pr = float_to_float(pa, &float32_params, s);
return float32_round_pack_canonical(pr, s);
}
float32 bfloat16_to_float32(bfloat16 a, float_status *s)
{
- FloatParts64 p = bfloat16_unpack_canonical(a, s);
- FloatParts64 pr = float_to_float(p, &float32_params, s);
+ FloatParts64 pa, pr;
+
+ bfloat16_unpack_canonical(&pa, a, s);
+ pr = float_to_float(pa, &float32_params, s);
return float32_round_pack_canonical(pr, s);
}
float64 bfloat16_to_float64(bfloat16 a, float_status *s)
{
- FloatParts64 p = bfloat16_unpack_canonical(a, s);
- FloatParts64 pr = float_to_float(p, &float64_params, s);
+ FloatParts64 pa, pr;
+
+ bfloat16_unpack_canonical(&pa, a, s);
+ pr = float_to_float(pa, &float64_params, s);
return float64_round_pack_canonical(pr, s);
}
bfloat16 float32_to_bfloat16(float32 a, float_status *s)
{
- FloatParts64 p = float32_unpack_canonical(a, s);
- FloatParts64 pr = float_to_float(p, &bfloat16_params, s);
+ FloatParts64 pa, pr;
+
+ float32_unpack_canonical(&pa, a, s);
+ pr = float_to_float(pa, &bfloat16_params, s);
return bfloat16_round_pack_canonical(pr, s);
}
bfloat16 float64_to_bfloat16(float64 a, float_status *s)
{
- FloatParts64 p = float64_unpack_canonical(a, s);
- FloatParts64 pr = float_to_float(p, &bfloat16_params, s);
+ FloatParts64 pa, pr;
+
+ float64_unpack_canonical(&pa, a, s);
+ pr = float_to_float(pa, &bfloat16_params, s);
return bfloat16_round_pack_canonical(pr, s);
}
@@ -2164,22 +2216,28 @@ static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
float16 float16_round_to_int(float16 a, float_status *s)
{
- FloatParts64 pa = float16_unpack_canonical(a, s);
- FloatParts64 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
+ FloatParts64 pa, pr;
+
+ float16_unpack_canonical(&pa, a, s);
+ pr = round_to_int(pa, s->float_rounding_mode, 0, s);
return float16_round_pack_canonical(pr, s);
}
float32 float32_round_to_int(float32 a, float_status *s)
{
- FloatParts64 pa = float32_unpack_canonical(a, s);
- FloatParts64 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
+ FloatParts64 pa, pr;
+
+ float32_unpack_canonical(&pa, a, s);
+ pr = round_to_int(pa, s->float_rounding_mode, 0, s);
return float32_round_pack_canonical(pr, s);
}
float64 float64_round_to_int(float64 a, float_status *s)
{
- FloatParts64 pa = float64_unpack_canonical(a, s);
- FloatParts64 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
+ FloatParts64 pa, pr;
+
+ float64_unpack_canonical(&pa, a, s);
+ pr = round_to_int(pa, s->float_rounding_mode, 0, s);
return float64_round_pack_canonical(pr, s);
}
@@ -2190,8 +2248,10 @@ float64 float64_round_to_int(float64 a, float_status *s)
bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
{
- FloatParts64 pa = bfloat16_unpack_canonical(a, s);
- FloatParts64 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
+ FloatParts64 pa, pr;
+
+ bfloat16_unpack_canonical(&pa, a, s);
+ pr = round_to_int(pa, s->float_rounding_mode, 0, s);
return bfloat16_round_pack_canonical(pr, s);
}
@@ -2253,71 +2313,91 @@ static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, INT8_MIN, INT8_MAX, s);
+ FloatParts64 p;
+
+ float16_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
}
int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, INT16_MIN, INT16_MAX, s);
+ FloatParts64 p;
+
+ float16_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
}
int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, INT32_MIN, INT32_MAX, s);
+ FloatParts64 p;
+
+ float16_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
}
int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, INT64_MIN, INT64_MAX, s);
+ FloatParts64 p;
+
+ float16_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(float32_unpack_canonical(a, s),
- rmode, scale, INT16_MIN, INT16_MAX, s);
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
}
int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(float32_unpack_canonical(a, s),
- rmode, scale, INT32_MIN, INT32_MAX, s);
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
}
int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(float32_unpack_canonical(a, s),
- rmode, scale, INT64_MIN, INT64_MAX, s);
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(float64_unpack_canonical(a, s),
- rmode, scale, INT16_MIN, INT16_MAX, s);
+ FloatParts64 p;
+
+ float64_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
}
int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(float64_unpack_canonical(a, s),
- rmode, scale, INT32_MIN, INT32_MAX, s);
+ FloatParts64 p;
+
+ float64_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
}
int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(float64_unpack_canonical(a, s),
- rmode, scale, INT64_MIN, INT64_MAX, s);
+ FloatParts64 p;
+
+ float64_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
int8_t float16_to_int8(float16 a, float_status *s)
@@ -2423,22 +2503,28 @@ int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
- rmode, scale, INT16_MIN, INT16_MAX, s);
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
}
int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
- rmode, scale, INT32_MIN, INT32_MAX, s);
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
}
int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
- rmode, scale, INT64_MIN, INT64_MAX, s);
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
@@ -2532,71 +2618,91 @@ static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_uint_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, UINT8_MAX, s);
+ FloatParts64 p;
+
+ float16_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
}
uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_uint_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, UINT16_MAX, s);
+ FloatParts64 p;
+
+ float16_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
}
uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_uint_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, UINT32_MAX, s);
+ FloatParts64 p;
+
+ float16_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
}
uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_uint_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, UINT64_MAX, s);
+ FloatParts64 p;
+
+ float16_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
}
uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_uint_and_pack(float32_unpack_canonical(a, s),
- rmode, scale, UINT16_MAX, s);
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
}
uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_uint_and_pack(float32_unpack_canonical(a, s),
- rmode, scale, UINT32_MAX, s);
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
}
uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_uint_and_pack(float32_unpack_canonical(a, s),
- rmode, scale, UINT64_MAX, s);
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
}
uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_uint_and_pack(float64_unpack_canonical(a, s),
- rmode, scale, UINT16_MAX, s);
+ FloatParts64 p;
+
+ float64_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
}
uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_uint_and_pack(float64_unpack_canonical(a, s),
- rmode, scale, UINT32_MAX, s);
+ FloatParts64 p;
+
+ float64_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
}
uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
float_status *s)
{
- return round_to_uint_and_pack(float64_unpack_canonical(a, s),
- rmode, scale, UINT64_MAX, s);
+ FloatParts64 p;
+
+ float64_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
}
uint8_t float16_to_uint8(float16 a, float_status *s)
@@ -2702,22 +2808,28 @@ uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
int scale, float_status *s)
{
- return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
- rmode, scale, UINT16_MAX, s);
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
}
uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
int scale, float_status *s)
{
- return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
- rmode, scale, UINT32_MAX, s);
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
}
uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
int scale, float_status *s)
{
- return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
- rmode, scale, UINT64_MAX, s);
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
}
uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
@@ -3168,10 +3280,10 @@ static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
float_status *s) \
{ \
- FloatParts64 pa = float ## sz ## _unpack_canonical(a, s); \
- FloatParts64 pb = float ## sz ## _unpack_canonical(b, s); \
- FloatParts64 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
- \
+ FloatParts64 pa, pb, pr; \
+ float ## sz ## _unpack_canonical(&pa, a, s); \
+ float ## sz ## _unpack_canonical(&pb, b, s); \
+ pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
return float ## sz ## _round_pack_canonical(pr, s); \
}
@@ -3201,10 +3313,10 @@ MINMAX(64, maxnummag, false, true, true)
#define BF16_MINMAX(name, ismin, isiee, ismag) \
bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
{ \
- FloatParts64 pa = bfloat16_unpack_canonical(a, s); \
- FloatParts64 pb = bfloat16_unpack_canonical(b, s); \
- FloatParts64 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
- \
+ FloatParts64 pa, pb, pr; \
+ bfloat16_unpack_canonical(&pa, a, s); \
+ bfloat16_unpack_canonical(&pb, b, s); \
+ pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
return bfloat16_round_pack_canonical(pr, s); \
}
@@ -3279,8 +3391,9 @@ static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quie
static int attr \
name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
{ \
- FloatParts64 pa = float ## sz ## _unpack_canonical(a, s); \
- FloatParts64 pb = float ## sz ## _unpack_canonical(b, s); \
+ FloatParts64 pa, pb; \
+ float ## sz ## _unpack_canonical(&pa, a, s); \
+ float ## sz ## _unpack_canonical(&pb, b, s); \
return compare_floats(pa, pb, is_quiet, s); \
}
@@ -3381,8 +3494,10 @@ FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
static FloatRelation QEMU_FLATTEN
soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
{
- FloatParts64 pa = bfloat16_unpack_canonical(a, s);
- FloatParts64 pb = bfloat16_unpack_canonical(b, s);
+ FloatParts64 pa, pb;
+
+ bfloat16_unpack_canonical(&pa, a, s);
+ bfloat16_unpack_canonical(&pb, b, s);
return compare_floats(pa, pb, is_quiet, s);
}
@@ -3416,29 +3531,37 @@ static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
float16 float16_scalbn(float16 a, int n, float_status *status)
{
- FloatParts64 pa = float16_unpack_canonical(a, status);
- FloatParts64 pr = scalbn_decomposed(pa, n, status);
+ FloatParts64 pa, pr;
+
+ float16_unpack_canonical(&pa, a, status);
+ pr = scalbn_decomposed(pa, n, status);
return float16_round_pack_canonical(pr, status);
}
float32 float32_scalbn(float32 a, int n, float_status *status)
{
- FloatParts64 pa = float32_unpack_canonical(a, status);
- FloatParts64 pr = scalbn_decomposed(pa, n, status);
+ FloatParts64 pa, pr;
+
+ float32_unpack_canonical(&pa, a, status);
+ pr = scalbn_decomposed(pa, n, status);
return float32_round_pack_canonical(pr, status);
}
float64 float64_scalbn(float64 a, int n, float_status *status)
{
- FloatParts64 pa = float64_unpack_canonical(a, status);
- FloatParts64 pr = scalbn_decomposed(pa, n, status);
+ FloatParts64 pa, pr;
+
+ float64_unpack_canonical(&pa, a, status);
+ pr = scalbn_decomposed(pa, n, status);
return float64_round_pack_canonical(pr, status);
}
bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
{
- FloatParts64 pa = bfloat16_unpack_canonical(a, status);
- FloatParts64 pr = scalbn_decomposed(pa, n, status);
+ FloatParts64 pa, pr;
+
+ bfloat16_unpack_canonical(&pa, a, status);
+ pr = scalbn_decomposed(pa, n, status);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -3515,24 +3638,30 @@ static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *
float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
{
- FloatParts64 pa = float16_unpack_canonical(a, status);
- FloatParts64 pr = sqrt_float(pa, status, &float16_params);
+ FloatParts64 pa, pr;
+
+ float16_unpack_canonical(&pa, a, status);
+ pr = sqrt_float(pa, status, &float16_params);
return float16_round_pack_canonical(pr, status);
}
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_sqrt(float32 a, float_status *status)
{
- FloatParts64 pa = float32_unpack_canonical(a, status);
- FloatParts64 pr = sqrt_float(pa, status, &float32_params);
+ FloatParts64 pa, pr;
+
+ float32_unpack_canonical(&pa, a, status);
+ pr = sqrt_float(pa, status, &float32_params);
return float32_round_pack_canonical(pr, status);
}
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_sqrt(float64 a, float_status *status)
{
- FloatParts64 pa = float64_unpack_canonical(a, status);
- FloatParts64 pr = sqrt_float(pa, status, &float64_params);
+ FloatParts64 pa, pr;
+
+ float64_unpack_canonical(&pa, a, status);
+ pr = sqrt_float(pa, status, &float64_params);
return float64_round_pack_canonical(pr, status);
}
@@ -3592,8 +3721,10 @@ float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
{
- FloatParts64 pa = bfloat16_unpack_canonical(a, status);
- FloatParts64 pr = sqrt_float(pa, status, &bfloat16_params);
+ FloatParts64 pa, pr;
+
+ bfloat16_unpack_canonical(&pa, a, status);
+ pr = sqrt_float(pa, status, &bfloat16_params);
return bfloat16_round_pack_canonical(pr, status);
}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 23/46] softfloat: Use pointers with ftype_round_pack_canonical
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (21 preceding siblings ...)
2021-05-16 12:34 ` [PULL 22/46] softfloat: Use pointers with ftype_unpack_canonical Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 24/46] softfloat: Use pointers with parts_silence_nan Richard Henderson
` (24 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 131 +++++++++++++++++++++++++-----------------------
1 file changed, 68 insertions(+), 63 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index e53d4a138f..b0cbd5941c 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -945,22 +945,25 @@ static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
*p = sf_canonicalize(*p, &bfloat16_params, s);
}
-static float16 float16a_round_pack_canonical(FloatParts64 p, float_status *s,
+static float16 float16a_round_pack_canonical(FloatParts64 *p,
+ float_status *s,
const FloatFmt *params)
{
- p = round_canonical(p, s, params);
- return float16_pack_raw(&p);
+ *p = round_canonical(*p, s, params);
+ return float16_pack_raw(p);
}
-static float16 float16_round_pack_canonical(FloatParts64 p, float_status *s)
+static float16 float16_round_pack_canonical(FloatParts64 *p,
+ float_status *s)
{
return float16a_round_pack_canonical(p, s, &float16_params);
}
-static bfloat16 bfloat16_round_pack_canonical(FloatParts64 p, float_status *s)
+static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
+ float_status *s)
{
- p = round_canonical(p, s, &bfloat16_params);
- return bfloat16_pack_raw(&p);
+ *p = round_canonical(*p, s, &bfloat16_params);
+ return bfloat16_pack_raw(p);
}
static void float32_unpack_canonical(FloatParts64 *p, float32 f,
@@ -970,10 +973,11 @@ static void float32_unpack_canonical(FloatParts64 *p, float32 f,
*p = sf_canonicalize(*p, &float32_params, s);
}
-static float32 float32_round_pack_canonical(FloatParts64 p, float_status *s)
+static float32 float32_round_pack_canonical(FloatParts64 *p,
+ float_status *s)
{
- p = round_canonical(p, s, &float32_params);
- return float32_pack_raw(&p);
+ *p = round_canonical(*p, s, &float32_params);
+ return float32_pack_raw(p);
}
static void float64_unpack_canonical(FloatParts64 *p, float64 f,
@@ -983,10 +987,11 @@ static void float64_unpack_canonical(FloatParts64 *p, float64 f,
*p = sf_canonicalize(*p, &float64_params, s);
}
-static float64 float64_round_pack_canonical(FloatParts64 p, float_status *s)
+static float64 float64_round_pack_canonical(FloatParts64 *p,
+ float_status *s)
{
- p = round_canonical(p, s, &float64_params);
- return float64_pack_raw(&p);
+ *p = round_canonical(*p, s, &float64_params);
+ return float64_pack_raw(p);
}
/*
@@ -1093,7 +1098,7 @@ float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
float16_unpack_canonical(&pb, b, status);
pr = addsub_floats(pa, pb, false, status);
- return float16_round_pack_canonical(pr, status);
+ return float16_round_pack_canonical(&pr, status);
}
float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
@@ -1104,7 +1109,7 @@ float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
float16_unpack_canonical(&pb, b, status);
pr = addsub_floats(pa, pb, true, status);
- return float16_round_pack_canonical(pr, status);
+ return float16_round_pack_canonical(&pr, status);
}
static float32 QEMU_SOFTFLOAT_ATTR
@@ -1116,7 +1121,7 @@ soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
float32_unpack_canonical(&pb, b, status);
pr = addsub_floats(pa, pb, subtract, status);
- return float32_round_pack_canonical(pr, status);
+ return float32_round_pack_canonical(&pr, status);
}
static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
@@ -1138,7 +1143,7 @@ soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
float64_unpack_canonical(&pb, b, status);
pr = addsub_floats(pa, pb, subtract, status);
- return float64_round_pack_canonical(pr, status);
+ return float64_round_pack_canonical(&pr, status);
}
static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
@@ -1238,7 +1243,7 @@ bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
bfloat16_unpack_canonical(&pb, b, status);
pr = addsub_floats(pa, pb, false, status);
- return bfloat16_round_pack_canonical(pr, status);
+ return bfloat16_round_pack_canonical(&pr, status);
}
bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
@@ -1249,7 +1254,7 @@ bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
bfloat16_unpack_canonical(&pb, b, status);
pr = addsub_floats(pa, pb, true, status);
- return bfloat16_round_pack_canonical(pr, status);
+ return bfloat16_round_pack_canonical(&pr, status);
}
/*
@@ -1311,7 +1316,7 @@ float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
float16_unpack_canonical(&pb, b, status);
pr = mul_floats(pa, pb, status);
- return float16_round_pack_canonical(pr, status);
+ return float16_round_pack_canonical(&pr, status);
}
static float32 QEMU_SOFTFLOAT_ATTR
@@ -1323,7 +1328,7 @@ soft_f32_mul(float32 a, float32 b, float_status *status)
float32_unpack_canonical(&pb, b, status);
pr = mul_floats(pa, pb, status);
- return float32_round_pack_canonical(pr, status);
+ return float32_round_pack_canonical(&pr, status);
}
static float64 QEMU_SOFTFLOAT_ATTR
@@ -1335,7 +1340,7 @@ soft_f64_mul(float64 a, float64 b, float_status *status)
float64_unpack_canonical(&pb, b, status);
pr = mul_floats(pa, pb, status);
- return float64_round_pack_canonical(pr, status);
+ return float64_round_pack_canonical(&pr, status);
}
static float hard_f32_mul(float a, float b)
@@ -1375,7 +1380,7 @@ bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
bfloat16_unpack_canonical(&pb, b, status);
pr = mul_floats(pa, pb, status);
- return bfloat16_round_pack_canonical(pr, status);
+ return bfloat16_round_pack_canonical(&pr, status);
}
/*
@@ -1574,7 +1579,7 @@ float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
float16_unpack_canonical(&pc, c, status);
pr = muladd_floats(pa, pb, pc, flags, status);
- return float16_round_pack_canonical(pr, status);
+ return float16_round_pack_canonical(&pr, status);
}
static float32 QEMU_SOFTFLOAT_ATTR
@@ -1588,7 +1593,7 @@ soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
float32_unpack_canonical(&pc, c, status);
pr = muladd_floats(pa, pb, pc, flags, status);
- return float32_round_pack_canonical(pr, status);
+ return float32_round_pack_canonical(&pr, status);
}
static float64 QEMU_SOFTFLOAT_ATTR
@@ -1602,7 +1607,7 @@ soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
float64_unpack_canonical(&pc, c, status);
pr = muladd_floats(pa, pb, pc, flags, status);
- return float64_round_pack_canonical(pr, status);
+ return float64_round_pack_canonical(&pr, status);
}
static bool force_soft_fma;
@@ -1765,7 +1770,7 @@ bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
bfloat16_unpack_canonical(&pc, c, status);
pr = muladd_floats(pa, pb, pc, flags, status);
- return bfloat16_round_pack_canonical(pr, status);
+ return bfloat16_round_pack_canonical(&pr, status);
}
/*
@@ -1848,7 +1853,7 @@ float16 float16_div(float16 a, float16 b, float_status *status)
float16_unpack_canonical(&pb, b, status);
pr = div_floats(pa, pb, status);
- return float16_round_pack_canonical(pr, status);
+ return float16_round_pack_canonical(&pr, status);
}
static float32 QEMU_SOFTFLOAT_ATTR
@@ -1860,7 +1865,7 @@ soft_f32_div(float32 a, float32 b, float_status *status)
float32_unpack_canonical(&pb, b, status);
pr = div_floats(pa, pb, status);
- return float32_round_pack_canonical(pr, status);
+ return float32_round_pack_canonical(&pr, status);
}
static float64 QEMU_SOFTFLOAT_ATTR
@@ -1872,7 +1877,7 @@ soft_f64_div(float64 a, float64 b, float_status *status)
float64_unpack_canonical(&pb, b, status);
pr = div_floats(pa, pb, status);
- return float64_round_pack_canonical(pr, status);
+ return float64_round_pack_canonical(&pr, status);
}
static float hard_f32_div(float a, float b)
@@ -1946,7 +1951,7 @@ bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
bfloat16_unpack_canonical(&pb, b, status);
pr = div_floats(pa, pb, status);
- return bfloat16_round_pack_canonical(pr, status);
+ return bfloat16_round_pack_canonical(&pr, status);
}
/*
@@ -2002,7 +2007,7 @@ float32 float16_to_float32(float16 a, bool ieee, float_status *s)
float16a_unpack_canonical(&pa, a, s, fmt16);
pr = float_to_float(pa, &float32_params, s);
- return float32_round_pack_canonical(pr, s);
+ return float32_round_pack_canonical(&pr, s);
}
float64 float16_to_float64(float16 a, bool ieee, float_status *s)
@@ -2012,7 +2017,7 @@ float64 float16_to_float64(float16 a, bool ieee, float_status *s)
float16a_unpack_canonical(&pa, a, s, fmt16);
pr = float_to_float(pa, &float64_params, s);
- return float64_round_pack_canonical(pr, s);
+ return float64_round_pack_canonical(&pr, s);
}
float16 float32_to_float16(float32 a, bool ieee, float_status *s)
@@ -2022,7 +2027,7 @@ float16 float32_to_float16(float32 a, bool ieee, float_status *s)
float32_unpack_canonical(&pa, a, s);
pr = float_to_float(pa, fmt16, s);
- return float16a_round_pack_canonical(pr, s, fmt16);
+ return float16a_round_pack_canonical(&pr, s, fmt16);
}
static float64 QEMU_SOFTFLOAT_ATTR
@@ -2032,7 +2037,7 @@ soft_float32_to_float64(float32 a, float_status *s)
float32_unpack_canonical(&pa, a, s);
pr = float_to_float(pa, &float64_params, s);
- return float64_round_pack_canonical(pr, s);
+ return float64_round_pack_canonical(&pr, s);
}
float64 float32_to_float64(float32 a, float_status *s)
@@ -2058,7 +2063,7 @@ float16 float64_to_float16(float64 a, bool ieee, float_status *s)
float64_unpack_canonical(&pa, a, s);
pr = float_to_float(pa, fmt16, s);
- return float16a_round_pack_canonical(pr, s, fmt16);
+ return float16a_round_pack_canonical(&pr, s, fmt16);
}
float32 float64_to_float32(float64 a, float_status *s)
@@ -2067,7 +2072,7 @@ float32 float64_to_float32(float64 a, float_status *s)
float64_unpack_canonical(&pa, a, s);
pr = float_to_float(pa, &float32_params, s);
- return float32_round_pack_canonical(pr, s);
+ return float32_round_pack_canonical(&pr, s);
}
float32 bfloat16_to_float32(bfloat16 a, float_status *s)
@@ -2076,7 +2081,7 @@ float32 bfloat16_to_float32(bfloat16 a, float_status *s)
bfloat16_unpack_canonical(&pa, a, s);
pr = float_to_float(pa, &float32_params, s);
- return float32_round_pack_canonical(pr, s);
+ return float32_round_pack_canonical(&pr, s);
}
float64 bfloat16_to_float64(bfloat16 a, float_status *s)
@@ -2085,7 +2090,7 @@ float64 bfloat16_to_float64(bfloat16 a, float_status *s)
bfloat16_unpack_canonical(&pa, a, s);
pr = float_to_float(pa, &float64_params, s);
- return float64_round_pack_canonical(pr, s);
+ return float64_round_pack_canonical(&pr, s);
}
bfloat16 float32_to_bfloat16(float32 a, float_status *s)
@@ -2094,7 +2099,7 @@ bfloat16 float32_to_bfloat16(float32 a, float_status *s)
float32_unpack_canonical(&pa, a, s);
pr = float_to_float(pa, &bfloat16_params, s);
- return bfloat16_round_pack_canonical(pr, s);
+ return bfloat16_round_pack_canonical(&pr, s);
}
bfloat16 float64_to_bfloat16(float64 a, float_status *s)
@@ -2103,7 +2108,7 @@ bfloat16 float64_to_bfloat16(float64 a, float_status *s)
float64_unpack_canonical(&pa, a, s);
pr = float_to_float(pa, &bfloat16_params, s);
- return bfloat16_round_pack_canonical(pr, s);
+ return bfloat16_round_pack_canonical(&pr, s);
}
/*
@@ -2220,7 +2225,7 @@ float16 float16_round_to_int(float16 a, float_status *s)
float16_unpack_canonical(&pa, a, s);
pr = round_to_int(pa, s->float_rounding_mode, 0, s);
- return float16_round_pack_canonical(pr, s);
+ return float16_round_pack_canonical(&pr, s);
}
float32 float32_round_to_int(float32 a, float_status *s)
@@ -2229,7 +2234,7 @@ float32 float32_round_to_int(float32 a, float_status *s)
float32_unpack_canonical(&pa, a, s);
pr = round_to_int(pa, s->float_rounding_mode, 0, s);
- return float32_round_pack_canonical(pr, s);
+ return float32_round_pack_canonical(&pr, s);
}
float64 float64_round_to_int(float64 a, float_status *s)
@@ -2238,7 +2243,7 @@ float64 float64_round_to_int(float64 a, float_status *s)
float64_unpack_canonical(&pa, a, s);
pr = round_to_int(pa, s->float_rounding_mode, 0, s);
- return float64_round_pack_canonical(pr, s);
+ return float64_round_pack_canonical(&pr, s);
}
/*
@@ -2252,7 +2257,7 @@ bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
bfloat16_unpack_canonical(&pa, a, s);
pr = round_to_int(pa, s->float_rounding_mode, 0, s);
- return bfloat16_round_pack_canonical(pr, s);
+ return bfloat16_round_pack_canonical(&pr, s);
}
/*
@@ -2898,7 +2903,7 @@ static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
{
FloatParts64 pa = int_to_float(a, scale, status);
- return float16_round_pack_canonical(pa, status);
+ return float16_round_pack_canonical(&pa, status);
}
float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
@@ -2934,7 +2939,7 @@ float16 int8_to_float16(int8_t a, float_status *status)
float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
{
FloatParts64 pa = int_to_float(a, scale, status);
- return float32_round_pack_canonical(pa, status);
+ return float32_round_pack_canonical(&pa, status);
}
float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
@@ -2965,7 +2970,7 @@ float32 int16_to_float32(int16_t a, float_status *status)
float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
{
FloatParts64 pa = int_to_float(a, scale, status);
- return float64_round_pack_canonical(pa, status);
+ return float64_round_pack_canonical(&pa, status);
}
float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
@@ -3001,7 +3006,7 @@ float64 int16_to_float64(int16_t a, float_status *status)
bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
{
FloatParts64 pa = int_to_float(a, scale, status);
- return bfloat16_round_pack_canonical(pa, status);
+ return bfloat16_round_pack_canonical(&pa, status);
}
bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
@@ -3058,7 +3063,7 @@ static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
{
FloatParts64 pa = uint_to_float(a, scale, status);
- return float16_round_pack_canonical(pa, status);
+ return float16_round_pack_canonical(&pa, status);
}
float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
@@ -3094,7 +3099,7 @@ float16 uint8_to_float16(uint8_t a, float_status *status)
float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
{
FloatParts64 pa = uint_to_float(a, scale, status);
- return float32_round_pack_canonical(pa, status);
+ return float32_round_pack_canonical(&pa, status);
}
float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
@@ -3125,7 +3130,7 @@ float32 uint16_to_float32(uint16_t a, float_status *status)
float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
{
FloatParts64 pa = uint_to_float(a, scale, status);
- return float64_round_pack_canonical(pa, status);
+ return float64_round_pack_canonical(&pa, status);
}
float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
@@ -3161,7 +3166,7 @@ float64 uint16_to_float64(uint16_t a, float_status *status)
bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
{
FloatParts64 pa = uint_to_float(a, scale, status);
- return bfloat16_round_pack_canonical(pa, status);
+ return bfloat16_round_pack_canonical(&pa, status);
}
bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
@@ -3284,7 +3289,7 @@ float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
float ## sz ## _unpack_canonical(&pa, a, s); \
float ## sz ## _unpack_canonical(&pb, b, s); \
pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
- return float ## sz ## _round_pack_canonical(pr, s); \
+ return float ## sz ## _round_pack_canonical(&pr, s); \
}
MINMAX(16, min, true, false, false)
@@ -3317,7 +3322,7 @@ bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
bfloat16_unpack_canonical(&pa, a, s); \
bfloat16_unpack_canonical(&pb, b, s); \
pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
- return bfloat16_round_pack_canonical(pr, s); \
+ return bfloat16_round_pack_canonical(&pr, s); \
}
BF16_MINMAX(min, true, false, false)
@@ -3535,7 +3540,7 @@ float16 float16_scalbn(float16 a, int n, float_status *status)
float16_unpack_canonical(&pa, a, status);
pr = scalbn_decomposed(pa, n, status);
- return float16_round_pack_canonical(pr, status);
+ return float16_round_pack_canonical(&pr, status);
}
float32 float32_scalbn(float32 a, int n, float_status *status)
@@ -3544,7 +3549,7 @@ float32 float32_scalbn(float32 a, int n, float_status *status)
float32_unpack_canonical(&pa, a, status);
pr = scalbn_decomposed(pa, n, status);
- return float32_round_pack_canonical(pr, status);
+ return float32_round_pack_canonical(&pr, status);
}
float64 float64_scalbn(float64 a, int n, float_status *status)
@@ -3553,7 +3558,7 @@ float64 float64_scalbn(float64 a, int n, float_status *status)
float64_unpack_canonical(&pa, a, status);
pr = scalbn_decomposed(pa, n, status);
- return float64_round_pack_canonical(pr, status);
+ return float64_round_pack_canonical(&pr, status);
}
bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
@@ -3562,7 +3567,7 @@ bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
bfloat16_unpack_canonical(&pa, a, status);
pr = scalbn_decomposed(pa, n, status);
- return bfloat16_round_pack_canonical(pr, status);
+ return bfloat16_round_pack_canonical(&pr, status);
}
/*
@@ -3642,7 +3647,7 @@ float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
float16_unpack_canonical(&pa, a, status);
pr = sqrt_float(pa, status, &float16_params);
- return float16_round_pack_canonical(pr, status);
+ return float16_round_pack_canonical(&pr, status);
}
static float32 QEMU_SOFTFLOAT_ATTR
@@ -3652,7 +3657,7 @@ soft_f32_sqrt(float32 a, float_status *status)
float32_unpack_canonical(&pa, a, status);
pr = sqrt_float(pa, status, &float32_params);
- return float32_round_pack_canonical(pr, status);
+ return float32_round_pack_canonical(&pr, status);
}
static float64 QEMU_SOFTFLOAT_ATTR
@@ -3662,7 +3667,7 @@ soft_f64_sqrt(float64 a, float_status *status)
float64_unpack_canonical(&pa, a, status);
pr = sqrt_float(pa, status, &float64_params);
- return float64_round_pack_canonical(pr, status);
+ return float64_round_pack_canonical(&pr, status);
}
float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
@@ -3725,7 +3730,7 @@ bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
bfloat16_unpack_canonical(&pa, a, status);
pr = sqrt_float(pa, status, &bfloat16_params);
- return bfloat16_round_pack_canonical(pr, status);
+ return bfloat16_round_pack_canonical(&pr, status);
}
/*----------------------------------------------------------------------------
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 24/46] softfloat: Use pointers with parts_silence_nan
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (22 preceding siblings ...)
2021-05-16 12:34 ` [PULL 23/46] softfloat: Use pointers with ftype_round_pack_canonical Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 25/46] softfloat: Rearrange FloatParts64 Richard Henderson
` (23 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
At the same time, rename to parts64_silence_nan, split out
parts_silence_nan_frac, and define a macro for parts_silence_nan.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 16 +++++++++-------
fpu/softfloat-specialize.c.inc | 17 +++++++++++------
2 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index b0cbd5941c..2123453d40 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -657,6 +657,7 @@ static inline float64 float64_pack_raw(const FloatParts64 *p)
#include "softfloat-specialize.c.inc"
#define parts_default_nan parts64_default_nan
+#define parts_silence_nan parts64_silence_nan
/* Canonicalize EXP and FRAC, setting CLS. */
static FloatParts64 sf_canonicalize(FloatParts64 part, const FloatFmt *parm,
@@ -851,7 +852,8 @@ static FloatParts64 return_nan(FloatParts64 a, float_status *s)
if (is_snan(a.cls)) {
float_raise(float_flag_invalid, s);
if (!s->default_nan_mode) {
- return parts_silence_nan(a, s);
+ parts_silence_nan(&a, s);
+ return a;
}
} else if (!s->default_nan_mode) {
return a;
@@ -875,7 +877,7 @@ static FloatParts64 pick_nan(FloatParts64 a, FloatParts64 b, float_status *s)
a = b;
}
if (is_snan(a.cls)) {
- return parts_silence_nan(a, s);
+ parts_silence_nan(&a, s);
}
}
return a;
@@ -916,7 +918,7 @@ static FloatParts64 pick_nan_muladd(FloatParts64 a, FloatParts64 b, FloatParts64
}
if (is_snan(a.cls)) {
- return parts_silence_nan(a, s);
+ parts_silence_nan(&a, s);
}
return a;
}
@@ -3801,7 +3803,7 @@ float16 float16_silence_nan(float16 a, float_status *status)
float16_unpack_raw(&p, a);
p.frac <<= float16_params.frac_shift;
- p = parts_silence_nan(p, status);
+ parts_silence_nan(&p, status);
p.frac >>= float16_params.frac_shift;
return float16_pack_raw(&p);
}
@@ -3812,7 +3814,7 @@ float32 float32_silence_nan(float32 a, float_status *status)
float32_unpack_raw(&p, a);
p.frac <<= float32_params.frac_shift;
- p = parts_silence_nan(p, status);
+ parts_silence_nan(&p, status);
p.frac >>= float32_params.frac_shift;
return float32_pack_raw(&p);
}
@@ -3823,7 +3825,7 @@ float64 float64_silence_nan(float64 a, float_status *status)
float64_unpack_raw(&p, a);
p.frac <<= float64_params.frac_shift;
- p = parts_silence_nan(p, status);
+ parts_silence_nan(&p, status);
p.frac >>= float64_params.frac_shift;
return float64_pack_raw(&p);
}
@@ -3834,7 +3836,7 @@ bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
bfloat16_unpack_raw(&p, a);
p.frac <<= bfloat16_params.frac_shift;
- p = parts_silence_nan(p, status);
+ parts_silence_nan(&p, status);
p.frac >>= bfloat16_params.frac_shift;
return bfloat16_pack_raw(&p);
}
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index 47c3652d63..4038955379 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -176,20 +176,25 @@ static void parts64_default_nan(FloatParts64 *p, float_status *status)
| floating-point parts.
*----------------------------------------------------------------------------*/
-static FloatParts64 parts_silence_nan(FloatParts64 a, float_status *status)
+static uint64_t parts_silence_nan_frac(uint64_t frac, float_status *status)
{
g_assert(!no_signaling_nans(status));
g_assert(!status->default_nan_mode);
/* The only snan_bit_is_one target without default_nan_mode is HPPA. */
if (snan_bit_is_one(status)) {
- a.frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1));
- a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2);
+ frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1));
+ frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2);
} else {
- a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1);
+ frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1);
}
- a.cls = float_class_qnan;
- return a;
+ return frac;
+}
+
+static void parts64_silence_nan(FloatParts64 *p, float_status *status)
+{
+ p->frac = parts_silence_nan_frac(p->frac, status);
+ p->cls = float_class_qnan;
}
/*----------------------------------------------------------------------------
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 25/46] softfloat: Rearrange FloatParts64
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (23 preceding siblings ...)
2021-05-16 12:34 ` [PULL 24/46] softfloat: Use pointers with parts_silence_nan Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 26/46] softfloat: Convert float128_silence_nan to parts Richard Henderson
` (22 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Shuffle the fraction to the end, otherwise sort by size.
Add frac_hi and frac_lo members to alias frac.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 2123453d40..ee609540aa 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -511,10 +511,20 @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c)
*/
typedef struct {
- uint64_t frac;
- int32_t exp;
FloatClass cls;
bool sign;
+ int32_t exp;
+ union {
+ /* Routines that know the structure may reference the singular name. */
+ uint64_t frac;
+ /*
+ * Routines expanded with multiple structures reference "hi" and "lo"
+ * depending on the operation. In FloatParts64, "hi" and "lo" are
+ * both the same word and aliased here.
+ */
+ uint64_t frac_hi;
+ uint64_t frac_lo;
+ };
} FloatParts64;
#define DECOMPOSED_BINARY_POINT 63
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 26/46] softfloat: Convert float128_silence_nan to parts
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (24 preceding siblings ...)
2021-05-16 12:34 ` [PULL 25/46] softfloat: Rearrange FloatParts64 Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 27/46] softfloat: Convert float128_default_nan " Richard Henderson
` (21 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
This is the minimal change that also introduces float128_params,
float128_unpack_raw, and float128_pack_raw without running into
unused symbol Werrors.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 96 +++++++++++++++++++++++++++++-----
fpu/softfloat-specialize.c.inc | 25 +++------
2 files changed, 89 insertions(+), 32 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index ee609540aa..f8f4ef51e8 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -500,14 +500,12 @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c)
}
/*
- * Structure holding all of the decomposed parts of a float. The
- * exponent is unbiased and the fraction is normalized. All
- * calculations are done with a 64 bit fraction and then rounded as
- * appropriate for the final format.
+ * Structure holding all of the decomposed parts of a float.
+ * The exponent is unbiased and the fraction is normalized.
*
- * Thanks to the packed FloatClass a decent compiler should be able to
- * fit the whole structure into registers and avoid using the stack
- * for parameter passing.
+ * The fraction words are stored in big-endian word ordering,
+ * so that truncation from a larger format to a smaller format
+ * can be done simply by ignoring subsequent elements.
*/
typedef struct {
@@ -527,6 +525,15 @@ typedef struct {
};
} FloatParts64;
+typedef struct {
+ FloatClass cls;
+ bool sign;
+ int32_t exp;
+ uint64_t frac_hi;
+ uint64_t frac_lo;
+} FloatParts128;
+
+/* These apply to the most significant word of each FloatPartsN. */
#define DECOMPOSED_BINARY_POINT 63
#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
@@ -562,11 +569,11 @@ typedef struct {
.exp_bias = ((1 << E) - 1) >> 1, \
.exp_max = (1 << E) - 1, \
.frac_size = F, \
- .frac_shift = DECOMPOSED_BINARY_POINT - F, \
- .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
- .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
- .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
- .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
+ .frac_shift = (-F - 1) & 63, \
+ .frac_lsb = 1ull << ((-F - 1) & 63), \
+ .frac_lsbm1 = 1ull << ((-F - 2) & 63), \
+ .round_mask = (1ull << ((-F - 1) & 63)) - 1, \
+ .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
static const FloatFmt float16_params = {
FLOAT_PARAMS(5, 10)
@@ -589,6 +596,10 @@ static const FloatFmt float64_params = {
FLOAT_PARAMS(11, 52)
};
+static const FloatFmt float128_params = {
+ FLOAT_PARAMS(15, 112)
+};
+
/* Unpack a float to parts, but do not canonicalize. */
static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
{
@@ -623,6 +634,20 @@ static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
unpack_raw64(p, &float64_params, f);
}
+static void float128_unpack_raw(FloatParts128 *p, float128 f)
+{
+ const int f_size = float128_params.frac_size - 64;
+ const int e_size = float128_params.exp_size;
+
+ *p = (FloatParts128) {
+ .cls = float_class_unclassified,
+ .sign = extract64(f.high, f_size + e_size, 1),
+ .exp = extract64(f.high, f_size, e_size),
+ .frac_hi = extract64(f.high, 0, f_size),
+ .frac_lo = f.low,
+ };
+}
+
/* Pack a float from parts, but do not canonicalize. */
static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
{
@@ -656,6 +681,18 @@ static inline float64 float64_pack_raw(const FloatParts64 *p)
return make_float64(pack_raw64(p, &float64_params));
}
+static float128 float128_pack_raw(const FloatParts128 *p)
+{
+ const int f_size = float128_params.frac_size - 64;
+ const int e_size = float128_params.exp_size;
+ uint64_t hi;
+
+ hi = (uint64_t)p->sign << (f_size + e_size);
+ hi = deposit64(hi, f_size, e_size, p->exp);
+ hi = deposit64(hi, 0, f_size, p->frac_hi);
+ return make_float128(hi, p->frac_lo);
+}
+
/*----------------------------------------------------------------------------
| Functions and definitions to determine: (1) whether tininess for underflow
| is detected before or after rounding by default, (2) what (if anything)
@@ -666,8 +703,30 @@ static inline float64 float64_pack_raw(const FloatParts64 *p)
*----------------------------------------------------------------------------*/
#include "softfloat-specialize.c.inc"
+#define PARTS_GENERIC_64_128(NAME, P) \
+ QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
+
#define parts_default_nan parts64_default_nan
-#define parts_silence_nan parts64_silence_nan
+#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
+
+
+/*
+ * Helper functions for softfloat-parts.c.inc, per-size operations.
+ */
+
+static void frac128_shl(FloatParts128 *a, int c)
+{
+ shift128Left(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+}
+
+#define frac_shl(A, C) frac128_shl(A, C)
+
+static void frac128_shr(FloatParts128 *a, int c)
+{
+ shift128Right(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+}
+
+#define frac_shr(A, C) frac128_shr(A, C)
/* Canonicalize EXP and FRAC, setting CLS. */
static FloatParts64 sf_canonicalize(FloatParts64 part, const FloatFmt *parm,
@@ -3851,6 +3910,17 @@ bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
return bfloat16_pack_raw(&p);
}
+float128 float128_silence_nan(float128 a, float_status *status)
+{
+ FloatParts128 p;
+
+ float128_unpack_raw(&p, a);
+ frac_shl(&p, float128_params.frac_shift);
+ parts_silence_nan(&p, status);
+ frac_shr(&p, float128_params.frac_shift);
+ return float128_pack_raw(&p);
+}
+
/*----------------------------------------------------------------------------
| If `a' is denormal and we are in flush-to-zero mode then set the
| input-denormal exception and return zero. Otherwise just return the value.
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index 4038955379..5b85b843c2 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -197,6 +197,12 @@ static void parts64_silence_nan(FloatParts64 *p, float_status *status)
p->cls = float_class_qnan;
}
+static void parts128_silence_nan(FloatParts128 *p, float_status *status)
+{
+ p->frac_hi = parts_silence_nan_frac(p->frac_hi, status);
+ p->cls = float_class_qnan;
+}
+
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN.
*----------------------------------------------------------------------------*/
@@ -1062,25 +1068,6 @@ bool float128_is_signaling_nan(float128 a, float_status *status)
}
}
-/*----------------------------------------------------------------------------
-| Returns a quiet NaN from a signalling NaN for the quadruple-precision
-| floating point value `a'.
-*----------------------------------------------------------------------------*/
-
-float128 float128_silence_nan(float128 a, float_status *status)
-{
- if (no_signaling_nans(status)) {
- g_assert_not_reached();
- } else {
- if (snan_bit_is_one(status)) {
- return float128_default_nan(status);
- } else {
- a.high |= UINT64_C(0x0000800000000000);
- return a;
- }
- }
-}
-
/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point NaN
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 27/46] softfloat: Convert float128_default_nan to parts
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (25 preceding siblings ...)
2021-05-16 12:34 ` [PULL 26/46] softfloat: Convert float128_silence_nan to parts Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 28/46] softfloat: Move return_nan to softfloat-parts.c.inc Richard Henderson
` (20 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 17 ++++-------------
fpu/softfloat-specialize.c.inc | 19 +++++++++++++++++++
2 files changed, 23 insertions(+), 13 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index f8f4ef51e8..08fd812ea0 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -706,7 +706,7 @@ static float128 float128_pack_raw(const FloatParts128 *p)
#define PARTS_GENERIC_64_128(NAME, P) \
QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
-#define parts_default_nan parts64_default_nan
+#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
@@ -3837,20 +3837,11 @@ float64 float64_default_nan(float_status *status)
float128 float128_default_nan(float_status *status)
{
- FloatParts64 p;
- float128 r;
+ FloatParts128 p;
parts_default_nan(&p, status);
- /* Extrapolate from the choices made by parts_default_nan to fill
- * in the quad-floating format. If the low bit is set, assume we
- * want to set all non-snan bits.
- */
- r.low = -(p.frac & 1);
- r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
- r.high |= UINT64_C(0x7FFF000000000000);
- r.high |= (uint64_t)p.sign << 63;
-
- return r;
+ frac_shr(&p, float128_params.frac_shift);
+ return float128_pack_raw(&p);
}
bfloat16 bfloat16_default_nan(float_status *status)
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index 5b85b843c2..c895733e79 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -171,6 +171,25 @@ static void parts64_default_nan(FloatParts64 *p, float_status *status)
};
}
+static void parts128_default_nan(FloatParts128 *p, float_status *status)
+{
+ /*
+ * Extrapolate from the choices made by parts64_default_nan to fill
+ * in the quad-floating format. If the low bit is set, assume we
+ * want to set all non-snan bits.
+ */
+ FloatParts64 p64;
+ parts64_default_nan(&p64, status);
+
+ *p = (FloatParts128) {
+ .cls = float_class_qnan,
+ .sign = p64.sign,
+ .exp = INT_MAX,
+ .frac_hi = p64.frac,
+ .frac_lo = -(p64.frac & 1)
+ };
+}
+
/*----------------------------------------------------------------------------
| Returns a quiet NaN from a signalling NaN for the deconstructed
| floating-point parts.
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 28/46] softfloat: Move return_nan to softfloat-parts.c.inc
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (26 preceding siblings ...)
2021-05-16 12:34 ` [PULL 27/46] softfloat: Convert float128_default_nan " Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 29/46] softfloat: Move pick_nan " Richard Henderson
` (19 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, David Hildenbrand
At the same time, convert to pointers, rename to return_nan$N
and define a macro for return_nan using QEMU_GENERIC.
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 45 ++++++++++++++++++++++-----------------
fpu/softfloat-parts.c.inc | 37 ++++++++++++++++++++++++++++++++
2 files changed, 62 insertions(+), 20 deletions(-)
create mode 100644 fpu/softfloat-parts.c.inc
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 08fd812ea0..bdc3125e5c 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -709,6 +709,10 @@ static float128 float128_pack_raw(const FloatParts128 *p)
#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
+static void parts64_return_nan(FloatParts64 *a, float_status *s);
+static void parts128_return_nan(FloatParts128 *a, float_status *s);
+
+#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
@@ -915,22 +919,6 @@ static FloatParts64 round_canonical(FloatParts64 p, float_status *s,
return p;
}
-static FloatParts64 return_nan(FloatParts64 a, float_status *s)
-{
- g_assert(is_nan(a.cls));
- if (is_snan(a.cls)) {
- float_raise(float_flag_invalid, s);
- if (!s->default_nan_mode) {
- parts_silence_nan(&a, s);
- return a;
- }
- } else if (!s->default_nan_mode) {
- return a;
- }
- parts_default_nan(&a, s);
- return a;
-}
-
static FloatParts64 pick_nan(FloatParts64 a, FloatParts64 b, float_status *s)
{
if (is_snan(a.cls) || is_snan(b.cls)) {
@@ -992,6 +980,21 @@ static FloatParts64 pick_nan_muladd(FloatParts64 a, FloatParts64 b, FloatParts64
return a;
}
+#define partsN(NAME) parts64_##NAME
+#define FloatPartsN FloatParts64
+
+#include "softfloat-parts.c.inc"
+
+#undef partsN
+#undef FloatPartsN
+#define partsN(NAME) parts128_##NAME
+#define FloatPartsN FloatParts128
+
+#include "softfloat-parts.c.inc"
+
+#undef partsN
+#undef FloatPartsN
+
/*
* Pack/unpack routines with a specific FloatFmt.
*/
@@ -2066,7 +2069,7 @@ static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
break;
}
} else if (is_nan(a.cls)) {
- return return_nan(a, s);
+ parts_return_nan(&a, s);
}
return a;
}
@@ -2195,7 +2198,8 @@ static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
switch (a.cls) {
case float_class_qnan:
case float_class_snan:
- return return_nan(a, s);
+ parts_return_nan(&a, s);
+ break;
case float_class_zero:
case float_class_inf:
@@ -3591,7 +3595,7 @@ FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
{
if (unlikely(is_nan(a.cls))) {
- return return_nan(a, s);
+ parts_return_nan(&a, s);
}
if (a.cls == float_class_normal) {
/* The largest float type (even though not supported by FloatParts64)
@@ -3659,7 +3663,8 @@ static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *
int bit, last_bit;
if (is_nan(a.cls)) {
- return return_nan(a, s);
+ parts_return_nan(&a, s);
+ return a;
}
if (a.cls == float_class_zero) {
return a; /* sqrt(+-0) = +-0 */
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
new file mode 100644
index 0000000000..2a3075d6fe
--- /dev/null
+++ b/fpu/softfloat-parts.c.inc
@@ -0,0 +1,37 @@
+/*
+ * QEMU float support
+ *
+ * The code in this source file is derived from release 2a of the SoftFloat
+ * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
+ * some later contributions) are provided under that license, as detailed below.
+ * It has subsequently been modified by contributors to the QEMU Project,
+ * so some portions are provided under:
+ * the SoftFloat-2a license
+ * the BSD license
+ * GPL-v2-or-later
+ *
+ * Any future contributions to this file after December 1st 2014 will be
+ * taken to be licensed under the Softfloat-2a license unless specifically
+ * indicated otherwise.
+ */
+
+static void partsN(return_nan)(FloatPartsN *a, float_status *s)
+{
+ switch (a->cls) {
+ case float_class_snan:
+ float_raise(float_flag_invalid, s);
+ if (s->default_nan_mode) {
+ parts_default_nan(a, s);
+ } else {
+ parts_silence_nan(a, s);
+ }
+ break;
+ case float_class_qnan:
+ if (s->default_nan_mode) {
+ parts_default_nan(a, s);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 29/46] softfloat: Move pick_nan to softfloat-parts.c.inc
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (27 preceding siblings ...)
2021-05-16 12:34 ` [PULL 28/46] softfloat: Move return_nan to softfloat-parts.c.inc Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 30/46] softfloat: Move pick_nan_muladd " Richard Henderson
` (18 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, David Hildenbrand
At the same time, convert to pointers, rename to parts$N_pick_nan
and define a macro for parts_pick_nan using QEMU_GENERIC.
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 62 ++++++++++++++++++++++-----------------
fpu/softfloat-parts.c.inc | 25 ++++++++++++++++
2 files changed, 60 insertions(+), 27 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index bdc3125e5c..019b34d378 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -714,10 +714,39 @@ static void parts128_return_nan(FloatParts128 *a, float_status *s);
#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
+static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
+ float_status *s);
+static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
+ float_status *s);
+
+#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
+#define FRAC_GENERIC_64_128(NAME, P) \
+ QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
+
+static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
+{
+ return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
+}
+
+static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
+{
+ uint64_t ta = a->frac_hi, tb = b->frac_hi;
+ if (ta == tb) {
+ ta = a->frac_lo, tb = b->frac_lo;
+ if (ta == tb) {
+ return 0;
+ }
+ }
+ return ta < tb ? -1 : 1;
+}
+
+#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
+
static void frac128_shl(FloatParts128 *a, int c)
{
shift128Left(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
@@ -919,27 +948,6 @@ static FloatParts64 round_canonical(FloatParts64 p, float_status *s,
return p;
}
-static FloatParts64 pick_nan(FloatParts64 a, FloatParts64 b, float_status *s)
-{
- if (is_snan(a.cls) || is_snan(b.cls)) {
- float_raise(float_flag_invalid, s);
- }
-
- if (s->default_nan_mode) {
- parts_default_nan(&a, s);
- } else {
- if (pickNaN(a.cls, b.cls,
- a.frac > b.frac ||
- (a.frac == b.frac && a.sign < b.sign), s)) {
- a = b;
- }
- if (is_snan(a.cls)) {
- parts_silence_nan(&a, s);
- }
- }
- return a;
-}
-
static FloatParts64 pick_nan_muladd(FloatParts64 a, FloatParts64 b, FloatParts64 c,
bool inf_zero, float_status *s)
{
@@ -1107,7 +1115,7 @@ static FloatParts64 addsub_floats(FloatParts64 a, FloatParts64 b, bool subtract,
return a;
}
if (is_nan(a.cls) || is_nan(b.cls)) {
- return pick_nan(a, b, s);
+ return *parts_pick_nan(&a, &b, s);
}
if (a.cls == float_class_inf) {
if (b.cls == float_class_inf) {
@@ -1145,7 +1153,7 @@ static FloatParts64 addsub_floats(FloatParts64 a, FloatParts64 b, bool subtract,
return a;
}
if (is_nan(a.cls) || is_nan(b.cls)) {
- return pick_nan(a, b, s);
+ return *parts_pick_nan(&a, &b, s);
}
if (a.cls == float_class_inf || b.cls == float_class_zero) {
return a;
@@ -1361,7 +1369,7 @@ static FloatParts64 mul_floats(FloatParts64 a, FloatParts64 b, float_status *s)
}
/* handle all the NaN cases */
if (is_nan(a.cls) || is_nan(b.cls)) {
- return pick_nan(a, b, s);
+ return *parts_pick_nan(&a, &b, s);
}
/* Inf * Zero == NaN */
if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
@@ -1888,7 +1896,7 @@ static FloatParts64 div_floats(FloatParts64 a, FloatParts64 b, float_status *s)
}
/* handle all the NaN cases */
if (is_nan(a.cls) || is_nan(b.cls)) {
- return pick_nan(a, b, s);
+ return *parts_pick_nan(&a, &b, s);
}
/* 0/0 or Inf/Inf */
if (a.cls == b.cls
@@ -3296,14 +3304,14 @@ static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
* the invalid exception is raised.
*/
if (is_snan(a.cls) || is_snan(b.cls)) {
- return pick_nan(a, b, s);
+ return *parts_pick_nan(&a, &b, s);
} else if (is_nan(a.cls) && !is_nan(b.cls)) {
return b;
} else if (is_nan(b.cls) && !is_nan(a.cls)) {
return a;
}
}
- return pick_nan(a, b, s);
+ return *parts_pick_nan(&a, &b, s);
} else {
int a_exp, b_exp;
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 2a3075d6fe..11a71650f7 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -35,3 +35,28 @@ static void partsN(return_nan)(FloatPartsN *a, float_status *s)
g_assert_not_reached();
}
}
+
+static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
+ float_status *s)
+{
+ if (is_snan(a->cls) || is_snan(b->cls)) {
+ float_raise(float_flag_invalid, s);
+ }
+
+ if (s->default_nan_mode) {
+ parts_default_nan(a, s);
+ } else {
+ int cmp = frac_cmp(a, b);
+ if (cmp == 0) {
+ cmp = a->sign < b->sign;
+ }
+
+ if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
+ a = b;
+ }
+ if (is_snan(a->cls)) {
+ parts_silence_nan(a, s);
+ }
+ }
+ return a;
+}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 30/46] softfloat: Move pick_nan_muladd to softfloat-parts.c.inc
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (28 preceding siblings ...)
2021-05-16 12:34 ` [PULL 29/46] softfloat: Move pick_nan " Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 31/46] softfloat: Move sf_canonicalize " Richard Henderson
` (17 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, David Hildenbrand
At the same time, convert to pointers, rename to pick_nan_muladd$N
and define a macro for pick_nan_muladd using QEMU_GENERIC.
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 53 ++++++++++-----------------------------
fpu/softfloat-parts.c.inc | 40 +++++++++++++++++++++++++++++
2 files changed, 53 insertions(+), 40 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 019b34d378..df004dbe2f 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -721,6 +721,18 @@ static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
+static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
+ FloatParts64 *c, float_status *s,
+ int ab_mask, int abc_mask);
+static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
+ FloatParts128 *b,
+ FloatParts128 *c,
+ float_status *s,
+ int ab_mask, int abc_mask);
+
+#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
+ PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@@ -948,45 +960,6 @@ static FloatParts64 round_canonical(FloatParts64 p, float_status *s,
return p;
}
-static FloatParts64 pick_nan_muladd(FloatParts64 a, FloatParts64 b, FloatParts64 c,
- bool inf_zero, float_status *s)
-{
- int which;
-
- if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
- float_raise(float_flag_invalid, s);
- }
-
- which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
-
- if (s->default_nan_mode) {
- /* Note that this check is after pickNaNMulAdd so that function
- * has an opportunity to set the Invalid flag.
- */
- which = 3;
- }
-
- switch (which) {
- case 0:
- break;
- case 1:
- a = b;
- break;
- case 2:
- a = c;
- break;
- case 3:
- parts_default_nan(&a, s);
- break;
- default:
- g_assert_not_reached();
- }
-
- if (is_snan(a.cls)) {
- parts_silence_nan(&a, s);
- }
- return a;
-}
#define partsN(NAME) parts64_##NAME
#define FloatPartsN FloatParts64
@@ -1497,7 +1470,7 @@ static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c
* off to the target-specific pick-a-NaN routine.
*/
if (unlikely(abc_mask & float_cmask_anynan)) {
- return pick_nan_muladd(a, b, c, inf_zero, s);
+ return *parts_pick_nan_muladd(&a, &b, &c, s, ab_mask, abc_mask);
}
if (inf_zero) {
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 11a71650f7..a78d61ea07 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -60,3 +60,43 @@ static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
}
return a;
}
+
+static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
+ FloatPartsN *c, float_status *s,
+ int ab_mask, int abc_mask)
+{
+ int which;
+
+ if (unlikely(abc_mask & float_cmask_snan)) {
+ float_raise(float_flag_invalid, s);
+ }
+
+ which = pickNaNMulAdd(a->cls, b->cls, c->cls,
+ ab_mask == float_cmask_infzero, s);
+
+ if (s->default_nan_mode || which == 3) {
+ /*
+ * Note that this check is after pickNaNMulAdd so that function
+ * has an opportunity to set the Invalid flag for infzero.
+ */
+ parts_default_nan(a, s);
+ return a;
+ }
+
+ switch (which) {
+ case 0:
+ break;
+ case 1:
+ a = b;
+ break;
+ case 2:
+ a = c;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (is_snan(a->cls)) {
+ parts_silence_nan(a, s);
+ }
+ return a;
+}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 31/46] softfloat: Move sf_canonicalize to softfloat-parts.c.inc
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (29 preceding siblings ...)
2021-05-16 12:34 ` [PULL 30/46] softfloat: Move pick_nan_muladd " Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 32/46] softfloat: Move round_canonical " Richard Henderson
` (16 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
At the same time, convert to pointers, rename to parts$N_canonicalize
and define a macro for parts_canonicalize using QEMU_GENERIC.
Rearrange the cases to recognize float_class_normal as
early as possible.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 117 +++++++++++++++++++++++++-------------
fpu/softfloat-parts.c.inc | 33 +++++++++++
2 files changed, 112 insertions(+), 38 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index df004dbe2f..535261db44 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -733,6 +733,14 @@ static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
+static void parts64_canonicalize(FloatParts64 *p, float_status *status,
+ const FloatFmt *fmt);
+static void parts128_canonicalize(FloatParts128 *p, float_status *status,
+ const FloatFmt *fmt);
+
+#define parts_canonicalize(A, S, F) \
+ PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@@ -759,52 +767,85 @@ static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
+static void frac64_clear(FloatParts64 *a)
+{
+ a->frac = 0;
+}
+
+static void frac128_clear(FloatParts128 *a)
+{
+ a->frac_hi = a->frac_lo = 0;
+}
+
+#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
+
+static bool frac64_eqz(FloatParts64 *a)
+{
+ return a->frac == 0;
+}
+
+static bool frac128_eqz(FloatParts128 *a)
+{
+ return (a->frac_hi | a->frac_lo) == 0;
+}
+
+#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
+
+static int frac64_normalize(FloatParts64 *a)
+{
+ if (a->frac) {
+ int shift = clz64(a->frac);
+ a->frac <<= shift;
+ return shift;
+ }
+ return 64;
+}
+
+static int frac128_normalize(FloatParts128 *a)
+{
+ if (a->frac_hi) {
+ int shl = clz64(a->frac_hi);
+ if (shl) {
+ int shr = 64 - shl;
+ a->frac_hi = (a->frac_hi << shl) | (a->frac_lo >> shr);
+ a->frac_lo = (a->frac_lo << shl);
+ }
+ return shl;
+ } else if (a->frac_lo) {
+ int shl = clz64(a->frac_lo);
+ a->frac_hi = (a->frac_lo << shl);
+ a->frac_lo = 0;
+ return shl + 64;
+ }
+ return 128;
+}
+
+#define frac_normalize(A) FRAC_GENERIC_64_128(normalize, A)(A)
+
+static void frac64_shl(FloatParts64 *a, int c)
+{
+ a->frac <<= c;
+}
+
static void frac128_shl(FloatParts128 *a, int c)
{
shift128Left(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
}
-#define frac_shl(A, C) frac128_shl(A, C)
+#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
+
+static void frac64_shr(FloatParts64 *a, int c)
+{
+ a->frac >>= c;
+}
static void frac128_shr(FloatParts128 *a, int c)
{
shift128Right(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
}
-#define frac_shr(A, C) frac128_shr(A, C)
+#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
-/* Canonicalize EXP and FRAC, setting CLS. */
-static FloatParts64 sf_canonicalize(FloatParts64 part, const FloatFmt *parm,
- float_status *status)
-{
- if (part.exp == parm->exp_max && !parm->arm_althp) {
- if (part.frac == 0) {
- part.cls = float_class_inf;
- } else {
- part.frac <<= parm->frac_shift;
- part.cls = (parts_is_snan_frac(part.frac, status)
- ? float_class_snan : float_class_qnan);
- }
- } else if (part.exp == 0) {
- if (likely(part.frac == 0)) {
- part.cls = float_class_zero;
- } else if (status->flush_inputs_to_zero) {
- float_raise(float_flag_input_denormal, status);
- part.cls = float_class_zero;
- part.frac = 0;
- } else {
- int shift = clz64(part.frac);
- part.cls = float_class_normal;
- part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
- part.frac <<= shift;
- }
- } else {
- part.cls = float_class_normal;
- part.exp -= parm->exp_bias;
- part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
- }
- return part;
-}
/* Round and uncanonicalize a floating-point number by parts. There
* are FRAC_SHIFT bits that may require rounding at the bottom of the
@@ -984,7 +1025,7 @@ static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
float_status *s, const FloatFmt *params)
{
float16_unpack_raw(p, f);
- *p = sf_canonicalize(*p, params, s);
+ parts_canonicalize(p, s, params);
}
static void float16_unpack_canonical(FloatParts64 *p, float16 f,
@@ -997,7 +1038,7 @@ static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
float_status *s)
{
bfloat16_unpack_raw(p, f);
- *p = sf_canonicalize(*p, &bfloat16_params, s);
+ parts_canonicalize(p, s, &bfloat16_params);
}
static float16 float16a_round_pack_canonical(FloatParts64 *p,
@@ -1025,7 +1066,7 @@ static void float32_unpack_canonical(FloatParts64 *p, float32 f,
float_status *s)
{
float32_unpack_raw(p, f);
- *p = sf_canonicalize(*p, &float32_params, s);
+ parts_canonicalize(p, s, &float32_params);
}
static float32 float32_round_pack_canonical(FloatParts64 *p,
@@ -1039,7 +1080,7 @@ static void float64_unpack_canonical(FloatParts64 *p, float64 f,
float_status *s)
{
float64_unpack_raw(p, f);
- *p = sf_canonicalize(*p, &float64_params, s);
+ parts_canonicalize(p, s, &float64_params);
}
static float64 float64_round_pack_canonical(FloatParts64 *p,
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index a78d61ea07..25bf99bd0f 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -100,3 +100,36 @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
}
return a;
}
+
+/*
+ * Canonicalize the FloatParts structure. Determine the class,
+ * unbias the exponent, and normalize the fraction.
+ */
+static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
+ const FloatFmt *fmt)
+{
+ if (unlikely(p->exp == 0)) {
+ if (likely(frac_eqz(p))) {
+ p->cls = float_class_zero;
+ } else if (status->flush_inputs_to_zero) {
+ float_raise(float_flag_input_denormal, status);
+ p->cls = float_class_zero;
+ frac_clear(p);
+ } else {
+ int shift = frac_normalize(p);
+ p->cls = float_class_normal;
+ p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
+ }
+ } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
+ p->cls = float_class_normal;
+ p->exp -= fmt->exp_bias;
+ frac_shl(p, fmt->frac_shift);
+ p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+ } else if (likely(frac_eqz(p))) {
+ p->cls = float_class_inf;
+ } else {
+ frac_shl(p, fmt->frac_shift);
+ p->cls = (parts_is_snan_frac(p->frac_hi, status)
+ ? float_class_snan : float_class_qnan);
+ }
+}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 32/46] softfloat: Move round_canonical to softfloat-parts.c.inc
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (30 preceding siblings ...)
2021-05-16 12:34 ` [PULL 31/46] softfloat: Move sf_canonicalize " Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 33/46] softfloat: Use uadd64_carry, usub64_borrow in softfloat-macros.h Richard Henderson
` (15 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
At the same time, convert to pointers, renaming to parts$N_uncanon,
and define a macro for parts_uncanon using QEMU_GENERIC.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 201 +++++++++-----------------------------
fpu/softfloat-parts.c.inc | 148 ++++++++++++++++++++++++++++
2 files changed, 193 insertions(+), 156 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 535261db44..817a91de85 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -741,6 +741,14 @@ static void parts128_canonicalize(FloatParts128 *p, float_status *status,
#define parts_canonicalize(A, S, F) \
PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
+static void parts64_uncanon(FloatParts64 *p, float_status *status,
+ const FloatFmt *fmt);
+static void parts128_uncanon(FloatParts128 *p, float_status *status,
+ const FloatFmt *fmt);
+
+#define parts_uncanon(A, S, F) \
+ PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@@ -748,6 +756,31 @@ static void parts128_canonicalize(FloatParts128 *p, float_status *status,
#define FRAC_GENERIC_64_128(NAME, P) \
QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
+static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
+{
+ return uadd64_overflow(a->frac, c, &r->frac);
+}
+
+static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
+{
+ c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
+ return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
+}
+
+#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
+
+static void frac64_allones(FloatParts64 *a)
+{
+ a->frac = -1;
+}
+
+static void frac128_allones(FloatParts128 *a)
+{
+ a->frac_hi = a->frac_lo = -1;
+}
+
+#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
+
static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
{
return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
@@ -846,161 +879,17 @@ static void frac128_shr(FloatParts128 *a, int c)
#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
-
-/* Round and uncanonicalize a floating-point number by parts. There
- * are FRAC_SHIFT bits that may require rounding at the bottom of the
- * fraction; these bits will be removed. The exponent will be biased
- * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
- */
-
-static FloatParts64 round_canonical(FloatParts64 p, float_status *s,
- const FloatFmt *parm)
+static void frac64_shrjam(FloatParts64 *a, int c)
{
- const uint64_t frac_lsb = parm->frac_lsb;
- const uint64_t frac_lsbm1 = parm->frac_lsbm1;
- const uint64_t round_mask = parm->round_mask;
- const uint64_t roundeven_mask = parm->roundeven_mask;
- const int exp_max = parm->exp_max;
- const int frac_shift = parm->frac_shift;
- uint64_t frac, inc;
- int exp, flags = 0;
- bool overflow_norm;
-
- frac = p.frac;
- exp = p.exp;
-
- switch (p.cls) {
- case float_class_normal:
- switch (s->float_rounding_mode) {
- case float_round_nearest_even:
- overflow_norm = false;
- inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
- break;
- case float_round_ties_away:
- overflow_norm = false;
- inc = frac_lsbm1;
- break;
- case float_round_to_zero:
- overflow_norm = true;
- inc = 0;
- break;
- case float_round_up:
- inc = p.sign ? 0 : round_mask;
- overflow_norm = p.sign;
- break;
- case float_round_down:
- inc = p.sign ? round_mask : 0;
- overflow_norm = !p.sign;
- break;
- case float_round_to_odd:
- overflow_norm = true;
- inc = frac & frac_lsb ? 0 : round_mask;
- break;
- default:
- g_assert_not_reached();
- }
-
- exp += parm->exp_bias;
- if (likely(exp > 0)) {
- if (frac & round_mask) {
- flags |= float_flag_inexact;
- if (uadd64_overflow(frac, inc, &frac)) {
- frac = (frac >> 1) | DECOMPOSED_IMPLICIT_BIT;
- exp++;
- }
- }
- frac >>= frac_shift;
-
- if (parm->arm_althp) {
- /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
- if (unlikely(exp > exp_max)) {
- /* Overflow. Return the maximum normal. */
- flags = float_flag_invalid;
- exp = exp_max;
- frac = -1;
- }
- } else if (unlikely(exp >= exp_max)) {
- flags |= float_flag_overflow | float_flag_inexact;
- if (overflow_norm) {
- exp = exp_max - 1;
- frac = -1;
- } else {
- p.cls = float_class_inf;
- goto do_inf;
- }
- }
- } else if (s->flush_to_zero) {
- flags |= float_flag_output_denormal;
- p.cls = float_class_zero;
- goto do_zero;
- } else {
- bool is_tiny = s->tininess_before_rounding || (exp < 0);
-
- if (!is_tiny) {
- uint64_t discard;
- is_tiny = !uadd64_overflow(frac, inc, &discard);
- }
-
- shift64RightJamming(frac, 1 - exp, &frac);
- if (frac & round_mask) {
- /* Need to recompute round-to-even. */
- switch (s->float_rounding_mode) {
- case float_round_nearest_even:
- inc = ((frac & roundeven_mask) != frac_lsbm1
- ? frac_lsbm1 : 0);
- break;
- case float_round_to_odd:
- inc = frac & frac_lsb ? 0 : round_mask;
- break;
- default:
- break;
- }
- flags |= float_flag_inexact;
- frac += inc;
- }
-
- exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
- frac >>= frac_shift;
-
- if (is_tiny && (flags & float_flag_inexact)) {
- flags |= float_flag_underflow;
- }
- if (exp == 0 && frac == 0) {
- p.cls = float_class_zero;
- }
- }
- break;
-
- case float_class_zero:
- do_zero:
- exp = 0;
- frac = 0;
- break;
-
- case float_class_inf:
- do_inf:
- assert(!parm->arm_althp);
- exp = exp_max;
- frac = 0;
- break;
-
- case float_class_qnan:
- case float_class_snan:
- assert(!parm->arm_althp);
- exp = exp_max;
- frac >>= parm->frac_shift;
- break;
-
- default:
- g_assert_not_reached();
- }
-
- float_raise(flags, s);
- p.exp = exp;
- p.frac = frac;
- return p;
+ shift64RightJamming(a->frac, c, &a->frac);
}
+static void frac128_shrjam(FloatParts128 *a, int c)
+{
+ shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+}
+
+#define frac_shrjam(A, C) FRAC_GENERIC_64_128(shrjam, A)(A, C)
#define partsN(NAME) parts64_##NAME
#define FloatPartsN FloatParts64
@@ -1045,7 +934,7 @@ static float16 float16a_round_pack_canonical(FloatParts64 *p,
float_status *s,
const FloatFmt *params)
{
- *p = round_canonical(*p, s, params);
+ parts_uncanon(p, s, params);
return float16_pack_raw(p);
}
@@ -1058,7 +947,7 @@ static float16 float16_round_pack_canonical(FloatParts64 *p,
static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
float_status *s)
{
- *p = round_canonical(*p, s, &bfloat16_params);
+ parts_uncanon(p, s, &bfloat16_params);
return bfloat16_pack_raw(p);
}
@@ -1072,7 +961,7 @@ static void float32_unpack_canonical(FloatParts64 *p, float32 f,
static float32 float32_round_pack_canonical(FloatParts64 *p,
float_status *s)
{
- *p = round_canonical(*p, s, &float32_params);
+ parts_uncanon(p, s, &float32_params);
return float32_pack_raw(p);
}
@@ -1086,7 +975,7 @@ static void float64_unpack_canonical(FloatParts64 *p, float64 f,
static float64 float64_round_pack_canonical(FloatParts64 *p,
float_status *s)
{
- *p = round_canonical(*p, s, &float64_params);
+ parts_uncanon(p, s, &float64_params);
return float64_pack_raw(p);
}
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 25bf99bd0f..efdc724770 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -133,3 +133,151 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
? float_class_snan : float_class_qnan);
}
}
+
+/*
+ * Round and uncanonicalize a floating-point number by parts. There
+ * are FRAC_SHIFT bits that may require rounding at the bottom of the
+ * fraction; these bits will be removed. The exponent will be biased
+ * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
+ */
+static void partsN(uncanon)(FloatPartsN *p, float_status *s,
+ const FloatFmt *fmt)
+{
+ const int exp_max = fmt->exp_max;
+ const int frac_shift = fmt->frac_shift;
+ const uint64_t frac_lsb = fmt->frac_lsb;
+ const uint64_t frac_lsbm1 = fmt->frac_lsbm1;
+ const uint64_t round_mask = fmt->round_mask;
+ const uint64_t roundeven_mask = fmt->roundeven_mask;
+ uint64_t inc;
+ bool overflow_norm;
+ int exp, flags = 0;
+
+ if (unlikely(p->cls != float_class_normal)) {
+ switch (p->cls) {
+ case float_class_zero:
+ p->exp = 0;
+ frac_clear(p);
+ return;
+ case float_class_inf:
+ g_assert(!fmt->arm_althp);
+ p->exp = fmt->exp_max;
+ frac_clear(p);
+ return;
+ case float_class_qnan:
+ case float_class_snan:
+ g_assert(!fmt->arm_althp);
+ p->exp = fmt->exp_max;
+ frac_shr(p, fmt->frac_shift);
+ return;
+ default:
+ break;
+ }
+ g_assert_not_reached();
+ }
+
+ switch (s->float_rounding_mode) {
+ case float_round_nearest_even:
+ overflow_norm = false;
+ inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
+ break;
+ case float_round_ties_away:
+ overflow_norm = false;
+ inc = frac_lsbm1;
+ break;
+ case float_round_to_zero:
+ overflow_norm = true;
+ inc = 0;
+ break;
+ case float_round_up:
+ inc = p->sign ? 0 : round_mask;
+ overflow_norm = p->sign;
+ break;
+ case float_round_down:
+ inc = p->sign ? round_mask : 0;
+ overflow_norm = !p->sign;
+ break;
+ case float_round_to_odd:
+ overflow_norm = true;
+ inc = p->frac_lo & frac_lsb ? 0 : round_mask;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ exp = p->exp + fmt->exp_bias;
+ if (likely(exp > 0)) {
+ if (p->frac_lo & round_mask) {
+ flags |= float_flag_inexact;
+ if (frac_addi(p, p, inc)) {
+ frac_shr(p, 1);
+ p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+ exp++;
+ }
+ }
+ frac_shr(p, frac_shift);
+
+ if (fmt->arm_althp) {
+ /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
+ if (unlikely(exp > exp_max)) {
+ /* Overflow. Return the maximum normal. */
+ flags = float_flag_invalid;
+ exp = exp_max;
+ frac_allones(p);
+ }
+ } else if (unlikely(exp >= exp_max)) {
+ flags |= float_flag_overflow | float_flag_inexact;
+ if (overflow_norm) {
+ exp = exp_max - 1;
+ frac_allones(p);
+ } else {
+ p->cls = float_class_inf;
+ exp = exp_max;
+ frac_clear(p);
+ }
+ }
+ } else if (s->flush_to_zero) {
+ flags |= float_flag_output_denormal;
+ p->cls = float_class_zero;
+ exp = 0;
+ frac_clear(p);
+ } else {
+ bool is_tiny = s->tininess_before_rounding || exp < 0;
+
+ if (!is_tiny) {
+ FloatPartsN discard;
+ is_tiny = !frac_addi(&discard, p, inc);
+ }
+
+ frac_shrjam(p, 1 - exp);
+
+ if (p->frac_lo & round_mask) {
+ /* Need to recompute round-to-even/round-to-odd. */
+ switch (s->float_rounding_mode) {
+ case float_round_nearest_even:
+ inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
+ ? frac_lsbm1 : 0);
+ break;
+ case float_round_to_odd:
+ inc = p->frac_lo & frac_lsb ? 0 : round_mask;
+ break;
+ default:
+ break;
+ }
+ flags |= float_flag_inexact;
+ frac_addi(p, p, inc);
+ }
+
+ exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
+ frac_shr(p, frac_shift);
+
+ if (is_tiny && (flags & float_flag_inexact)) {
+ flags |= float_flag_underflow;
+ }
+ if (exp == 0 && frac_eqz(p)) {
+ p->cls = float_class_zero;
+ }
+ }
+ p->exp = exp;
+ float_raise(flags, s);
+}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 33/46] softfloat: Use uadd64_carry, usub64_borrow in softfloat-macros.h
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (31 preceding siblings ...)
2021-05-16 12:34 ` [PULL 32/46] softfloat: Move round_canonical " Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 34/46] softfloat: Move addsub_floats to softfloat-parts.c.inc Richard Henderson
` (14 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Use compiler support for carry arithmetic.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat-macros.h | 95 +++++++++-------------------------
1 file changed, 25 insertions(+), 70 deletions(-)
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index a35ec2893a..2e3760a9c1 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -83,6 +83,7 @@ this code that are retained.
#define FPU_SOFTFLOAT_MACROS_H
#include "fpu/softfloat-types.h"
+#include "qemu/host-utils.h"
/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'. If any nonzero
@@ -403,16 +404,12 @@ static inline void
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
-static inline void
- add128(
- uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
+ uint64_t *z0Ptr, uint64_t *z1Ptr)
{
- uint64_t z1;
-
- z1 = a1 + b1;
- *z1Ptr = z1;
- *z0Ptr = a0 + b0 + ( z1 < a1 );
-
+ bool c = 0;
+ *z1Ptr = uadd64_carry(a1, b1, &c);
+ *z0Ptr = uadd64_carry(a0, b0, &c);
}
/*----------------------------------------------------------------------------
@@ -423,34 +420,14 @@ static inline void
| `z1Ptr', and `z2Ptr'.
*----------------------------------------------------------------------------*/
-static inline void
- add192(
- uint64_t a0,
- uint64_t a1,
- uint64_t a2,
- uint64_t b0,
- uint64_t b1,
- uint64_t b2,
- uint64_t *z0Ptr,
- uint64_t *z1Ptr,
- uint64_t *z2Ptr
- )
+static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t b0, uint64_t b1, uint64_t b2,
+ uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
{
- uint64_t z0, z1, z2;
- int8_t carry0, carry1;
-
- z2 = a2 + b2;
- carry1 = ( z2 < a2 );
- z1 = a1 + b1;
- carry0 = ( z1 < a1 );
- z0 = a0 + b0;
- z1 += carry1;
- z0 += ( z1 < carry1 );
- z0 += carry0;
- *z2Ptr = z2;
- *z1Ptr = z1;
- *z0Ptr = z0;
-
+ bool c = 0;
+ *z2Ptr = uadd64_carry(a2, b2, &c);
+ *z1Ptr = uadd64_carry(a1, b1, &c);
+ *z0Ptr = uadd64_carry(a0, b0, &c);
}
/*----------------------------------------------------------------------------
@@ -461,14 +438,12 @@ static inline void
| `z1Ptr'.
*----------------------------------------------------------------------------*/
-static inline void
- sub128(
- uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
+ uint64_t *z0Ptr, uint64_t *z1Ptr)
{
-
- *z1Ptr = a1 - b1;
- *z0Ptr = a0 - b0 - ( a1 < b1 );
-
+ bool c = 0;
+ *z1Ptr = usub64_borrow(a1, b1, &c);
+ *z0Ptr = usub64_borrow(a0, b0, &c);
}
/*----------------------------------------------------------------------------
@@ -479,34 +454,14 @@ static inline void
| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
*----------------------------------------------------------------------------*/
-static inline void
- sub192(
- uint64_t a0,
- uint64_t a1,
- uint64_t a2,
- uint64_t b0,
- uint64_t b1,
- uint64_t b2,
- uint64_t *z0Ptr,
- uint64_t *z1Ptr,
- uint64_t *z2Ptr
- )
+static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t b0, uint64_t b1, uint64_t b2,
+ uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
{
- uint64_t z0, z1, z2;
- int8_t borrow0, borrow1;
-
- z2 = a2 - b2;
- borrow1 = ( a2 < b2 );
- z1 = a1 - b1;
- borrow0 = ( a1 < b1 );
- z0 = a0 - b0;
- z0 -= ( z1 < borrow1 );
- z1 -= borrow1;
- z0 -= borrow0;
- *z2Ptr = z2;
- *z1Ptr = z1;
- *z0Ptr = z0;
-
+ bool c = 0;
+ *z2Ptr = usub64_borrow(a2, b2, &c);
+ *z1Ptr = usub64_borrow(a1, b1, &c);
+ *z0Ptr = usub64_borrow(a0, b0, &c);
}
/*----------------------------------------------------------------------------
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 34/46] softfloat: Move addsub_floats to softfloat-parts.c.inc
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (32 preceding siblings ...)
2021-05-16 12:34 ` [PULL 33/46] softfloat: Use uadd64_carry, usub64_borrow in softfloat-macros.h Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 35/46] softfloat: Implement float128_add/sub via parts Richard Henderson
` (13 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
In preparation for implementing multiple sizes. Rename to parts_addsub,
split out parts_add/sub_normal for future reuse with muladd.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 253 ++++++++++++++-----------------
fpu/softfloat-parts-addsub.c.inc | 62 ++++++++
fpu/softfloat-parts.c.inc | 81 ++++++++++
3 files changed, 255 insertions(+), 141 deletions(-)
create mode 100644 fpu/softfloat-parts-addsub.c.inc
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 817a91de85..afeef00097 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -749,6 +749,26 @@ static void parts128_uncanon(FloatParts128 *p, float_status *status,
#define parts_uncanon(A, S, F) \
PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
+static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
+static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
+
+#define parts_add_normal(A, B) \
+ PARTS_GENERIC_64_128(add_normal, A)(A, B)
+
+static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
+static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
+
+#define parts_sub_normal(A, B) \
+ PARTS_GENERIC_64_128(sub_normal, A)(A, B)
+
+static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
+ float_status *s, bool subtract);
+static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
+ float_status *s, bool subtract);
+
+#define parts_addsub(A, B, S, Z) \
+ PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@@ -756,6 +776,21 @@ static void parts128_uncanon(FloatParts128 *p, float_status *status,
#define FRAC_GENERIC_64_128(NAME, P) \
QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
+static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
+{
+ return uadd64_overflow(a->frac, b->frac, &r->frac);
+}
+
+static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
+{
+ bool c = 0;
+ r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
+ r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
+ return c;
+}
+
+#define frac_add(R, A, B) FRAC_GENERIC_64_128(add, R)(R, A, B)
+
static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
{
return uadd64_overflow(a->frac, c, &r->frac);
@@ -824,6 +859,20 @@ static bool frac128_eqz(FloatParts128 *a)
#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
+static void frac64_neg(FloatParts64 *a)
+{
+ a->frac = -a->frac;
+}
+
+static void frac128_neg(FloatParts128 *a)
+{
+ bool c = 0;
+ a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
+ a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
+}
+
+#define frac_neg(A) FRAC_GENERIC_64_128(neg, A)(A)
+
static int frac64_normalize(FloatParts64 *a)
{
if (a->frac) {
@@ -891,18 +940,36 @@ static void frac128_shrjam(FloatParts128 *a, int c)
#define frac_shrjam(A, C) FRAC_GENERIC_64_128(shrjam, A)(A, C)
-#define partsN(NAME) parts64_##NAME
-#define FloatPartsN FloatParts64
+static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
+{
+ return usub64_overflow(a->frac, b->frac, &r->frac);
+}
+static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
+{
+ bool c = 0;
+ r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
+ r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
+ return c;
+}
+
+#define frac_sub(R, A, B) FRAC_GENERIC_64_128(sub, R)(R, A, B)
+
+#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
+#define FloatPartsN glue(FloatParts,N)
+
+#define N 64
+
+#include "softfloat-parts-addsub.c.inc"
#include "softfloat-parts.c.inc"
-#undef partsN
-#undef FloatPartsN
-#define partsN(NAME) parts128_##NAME
-#define FloatPartsN FloatParts128
+#undef N
+#define N 128
+#include "softfloat-parts-addsub.c.inc"
#include "softfloat-parts.c.inc"
+#undef N
#undef partsN
#undef FloatPartsN
@@ -980,165 +1047,73 @@ static float64 float64_round_pack_canonical(FloatParts64 *p,
}
/*
- * Returns the result of adding or subtracting the values of the
- * floating-point values `a' and `b'. The operation is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic.
+ * Addition and subtraction
*/
-static FloatParts64 addsub_floats(FloatParts64 a, FloatParts64 b, bool subtract,
- float_status *s)
+static float16 QEMU_FLATTEN
+float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
{
- bool a_sign = a.sign;
- bool b_sign = b.sign ^ subtract;
-
- if (a_sign != b_sign) {
- /* Subtraction */
-
- if (a.cls == float_class_normal && b.cls == float_class_normal) {
- if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
- shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
- a.frac = a.frac - b.frac;
- } else {
- shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
- a.frac = b.frac - a.frac;
- a.exp = b.exp;
- a_sign ^= 1;
- }
-
- if (a.frac == 0) {
- a.cls = float_class_zero;
- a.sign = s->float_rounding_mode == float_round_down;
- } else {
- int shift = clz64(a.frac);
- a.frac = a.frac << shift;
- a.exp = a.exp - shift;
- a.sign = a_sign;
- }
- return a;
- }
- if (is_nan(a.cls) || is_nan(b.cls)) {
- return *parts_pick_nan(&a, &b, s);
- }
- if (a.cls == float_class_inf) {
- if (b.cls == float_class_inf) {
- float_raise(float_flag_invalid, s);
- parts_default_nan(&a, s);
- }
- return a;
- }
- if (a.cls == float_class_zero && b.cls == float_class_zero) {
- a.sign = s->float_rounding_mode == float_round_down;
- return a;
- }
- if (a.cls == float_class_zero || b.cls == float_class_inf) {
- b.sign = a_sign ^ 1;
- return b;
- }
- if (b.cls == float_class_zero) {
- return a;
- }
- } else {
- /* Addition */
- if (a.cls == float_class_normal && b.cls == float_class_normal) {
- if (a.exp > b.exp) {
- shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
- } else if (a.exp < b.exp) {
- shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
- a.exp = b.exp;
- }
-
- if (uadd64_overflow(a.frac, b.frac, &a.frac)) {
- shift64RightJamming(a.frac, 1, &a.frac);
- a.frac |= DECOMPOSED_IMPLICIT_BIT;
- a.exp += 1;
- }
- return a;
- }
- if (is_nan(a.cls) || is_nan(b.cls)) {
- return *parts_pick_nan(&a, &b, s);
- }
- if (a.cls == float_class_inf || b.cls == float_class_zero) {
- return a;
- }
- if (b.cls == float_class_inf || a.cls == float_class_zero) {
- b.sign = b_sign;
- return b;
- }
- }
- g_assert_not_reached();
-}
-
-/*
- * Returns the result of adding or subtracting the floating-point
- * values `a' and `b'. The operation is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
-
-float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
-{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
float16_unpack_canonical(&pa, a, status);
float16_unpack_canonical(&pb, b, status);
- pr = addsub_floats(pa, pb, false, status);
+ pr = parts_addsub(&pa, &pb, status, subtract);
- return float16_round_pack_canonical(&pr, status);
+ return float16_round_pack_canonical(pr, status);
}
-float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
+float16 float16_add(float16 a, float16 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ return float16_addsub(a, b, status, false);
+}
- float16_unpack_canonical(&pa, a, status);
- float16_unpack_canonical(&pb, b, status);
- pr = addsub_floats(pa, pb, true, status);
-
- return float16_round_pack_canonical(&pr, status);
+float16 float16_sub(float16 a, float16 b, float_status *status)
+{
+ return float16_addsub(a, b, status, true);
}
static float32 QEMU_SOFTFLOAT_ATTR
-soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
+soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
float32_unpack_canonical(&pa, a, status);
float32_unpack_canonical(&pb, b, status);
- pr = addsub_floats(pa, pb, subtract, status);
+ pr = parts_addsub(&pa, &pb, status, subtract);
- return float32_round_pack_canonical(&pr, status);
+ return float32_round_pack_canonical(pr, status);
}
-static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
+static float32 soft_f32_add(float32 a, float32 b, float_status *status)
{
- return soft_f32_addsub(a, b, false, status);
+ return soft_f32_addsub(a, b, status, false);
}
-static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
+static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
{
- return soft_f32_addsub(a, b, true, status);
+ return soft_f32_addsub(a, b, status, true);
}
static float64 QEMU_SOFTFLOAT_ATTR
-soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
+soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
float64_unpack_canonical(&pa, a, status);
float64_unpack_canonical(&pb, b, status);
- pr = addsub_floats(pa, pb, subtract, status);
+ pr = parts_addsub(&pa, &pb, status, subtract);
- return float64_round_pack_canonical(&pr, status);
+ return float64_round_pack_canonical(pr, status);
}
-static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
+static float64 soft_f64_add(float64 a, float64 b, float_status *status)
{
- return soft_f64_addsub(a, b, false, status);
+ return soft_f64_addsub(a, b, status, false);
}
-static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
+static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
{
- return soft_f64_addsub(a, b, true, status);
+ return soft_f64_addsub(a, b, status, true);
}
static float hard_f32_add(float a, float b)
@@ -1216,30 +1191,26 @@ float64_sub(float64 a, float64 b, float_status *s)
return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
}
-/*
- * Returns the result of adding or subtracting the bfloat16
- * values `a' and `b'.
- */
-bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
+static bfloat16 QEMU_FLATTEN
+bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
bfloat16_unpack_canonical(&pa, a, status);
bfloat16_unpack_canonical(&pb, b, status);
- pr = addsub_floats(pa, pb, false, status);
+ pr = parts_addsub(&pa, &pb, status, subtract);
- return bfloat16_round_pack_canonical(&pr, status);
+ return bfloat16_round_pack_canonical(pr, status);
}
-bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
+bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ return bfloat16_addsub(a, b, status, false);
+}
- bfloat16_unpack_canonical(&pa, a, status);
- bfloat16_unpack_canonical(&pb, b, status);
- pr = addsub_floats(pa, pb, true, status);
-
- return bfloat16_round_pack_canonical(&pr, status);
+bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
+{
+ return bfloat16_addsub(a, b, status, true);
}
/*
diff --git a/fpu/softfloat-parts-addsub.c.inc b/fpu/softfloat-parts-addsub.c.inc
new file mode 100644
index 0000000000..ae5c1017c5
--- /dev/null
+++ b/fpu/softfloat-parts-addsub.c.inc
@@ -0,0 +1,62 @@
+/*
+ * Floating point arithmetic implementation
+ *
+ * The code in this source file is derived from release 2a of the SoftFloat
+ * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
+ * some later contributions) are provided under that license, as detailed below.
+ * It has subsequently been modified by contributors to the QEMU Project,
+ * so some portions are provided under:
+ * the SoftFloat-2a license
+ * the BSD license
+ * GPL-v2-or-later
+ *
+ * Any future contributions to this file after December 1st 2014 will be
+ * taken to be licensed under the Softfloat-2a license unless specifically
+ * indicated otherwise.
+ */
+
+static void partsN(add_normal)(FloatPartsN *a, FloatPartsN *b)
+{
+ int exp_diff = a->exp - b->exp;
+
+ if (exp_diff > 0) {
+ frac_shrjam(b, exp_diff);
+ } else if (exp_diff < 0) {
+ frac_shrjam(a, -exp_diff);
+ a->exp = b->exp;
+ }
+
+ if (frac_add(a, a, b)) {
+ frac_shrjam(a, 1);
+ a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+ a->exp += 1;
+ }
+}
+
+static bool partsN(sub_normal)(FloatPartsN *a, FloatPartsN *b)
+{
+ int exp_diff = a->exp - b->exp;
+ int shift;
+
+ if (exp_diff > 0) {
+ frac_shrjam(b, exp_diff);
+ frac_sub(a, a, b);
+ } else if (exp_diff < 0) {
+ a->exp = b->exp;
+ a->sign ^= 1;
+ frac_shrjam(a, -exp_diff);
+ frac_sub(a, b, a);
+ } else if (frac_sub(a, a, b)) {
+ /* Overflow means that A was less than B. */
+ frac_neg(a);
+ a->sign ^= 1;
+ }
+
+ shift = frac_normalize(a);
+ if (likely(shift < N)) {
+ a->exp -= shift;
+ return true;
+ }
+ a->cls = float_class_zero;
+ return false;
+}
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index efdc724770..cfce9f6421 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -281,3 +281,84 @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s,
p->exp = exp;
float_raise(flags, s);
}
+
+/*
+ * Returns the result of adding or subtracting the values of the
+ * floating-point values `a' and `b'. The operation is performed
+ * according to the IEC/IEEE Standard for Binary Floating-Point
+ * Arithmetic.
+ */
+static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
+ float_status *s, bool subtract)
+{
+ bool b_sign = b->sign ^ subtract;
+ int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+
+ if (a->sign != b_sign) {
+ /* Subtraction */
+ if (likely(ab_mask == float_cmask_normal)) {
+ if (parts_sub_normal(a, b)) {
+ return a;
+ }
+ /* Subtract was exact, fall through to set sign. */
+ ab_mask = float_cmask_zero;
+ }
+
+ if (ab_mask == float_cmask_zero) {
+ a->sign = s->float_rounding_mode == float_round_down;
+ return a;
+ }
+
+ if (unlikely(ab_mask & float_cmask_anynan)) {
+ goto p_nan;
+ }
+
+ if (ab_mask & float_cmask_inf) {
+ if (a->cls != float_class_inf) {
+ /* N - Inf */
+ goto return_b;
+ }
+ if (b->cls != float_class_inf) {
+ /* Inf - N */
+ return a;
+ }
+ /* Inf - Inf */
+ float_raise(float_flag_invalid, s);
+ parts_default_nan(a, s);
+ return a;
+ }
+ } else {
+ /* Addition */
+ if (likely(ab_mask == float_cmask_normal)) {
+ parts_add_normal(a, b);
+ return a;
+ }
+
+ if (ab_mask == float_cmask_zero) {
+ return a;
+ }
+
+ if (unlikely(ab_mask & float_cmask_anynan)) {
+ goto p_nan;
+ }
+
+ if (ab_mask & float_cmask_inf) {
+ a->cls = float_class_inf;
+ return a;
+ }
+ }
+
+ if (b->cls == float_class_zero) {
+ g_assert(a->cls == float_class_normal);
+ return a;
+ }
+
+ g_assert(a->cls == float_class_zero);
+ g_assert(b->cls == float_class_normal);
+ return_b:
+ b->sign = b_sign;
+ return b;
+
+ p_nan:
+ return parts_pick_nan(a, b, s);
+}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 35/46] softfloat: Implement float128_add/sub via parts
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (33 preceding siblings ...)
2021-05-16 12:34 ` [PULL 34/46] softfloat: Move addsub_floats to softfloat-parts.c.inc Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 36/46] softfloat: Move mul_floats to softfloat-parts.c.inc Richard Henderson
` (12 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Replace the existing Berkeley implementation with the
FloatParts implementation.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 257 +++++++-----------------------------------------
1 file changed, 36 insertions(+), 221 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index afeef00097..8f734f6020 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1046,6 +1046,20 @@ static float64 float64_round_pack_canonical(FloatParts64 *p,
return float64_pack_raw(p);
}
+static void float128_unpack_canonical(FloatParts128 *p, float128 f,
+ float_status *s)
+{
+ float128_unpack_raw(p, f);
+ parts_canonicalize(p, s, &float128_params);
+}
+
+static float128 float128_round_pack_canonical(FloatParts128 *p,
+ float_status *s)
+{
+ parts_uncanon(p, s, &float128_params);
+ return float128_pack_raw(p);
+}
+
/*
* Addition and subtraction
*/
@@ -1213,6 +1227,28 @@ bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
return bfloat16_addsub(a, b, status, true);
}
+static float128 QEMU_FLATTEN
+float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
+{
+ FloatParts128 pa, pb, *pr;
+
+ float128_unpack_canonical(&pa, a, status);
+ float128_unpack_canonical(&pb, b, status);
+ pr = parts_addsub(&pa, &pb, status, subtract);
+
+ return float128_round_pack_canonical(pr, status);
+}
+
+float128 float128_add(float128 a, float128 b, float_status *status)
+{
+ return float128_addsub(a, b, status, false);
+}
+
+float128 float128_sub(float128 a, float128 b, float_status *status)
+{
+ return float128_addsub(a, b, status, true);
+}
+
/*
* Returns the result of multiplying the floating-point values `a' and
* `b'. The operation is performed according to the IEC/IEEE Standard
@@ -7032,227 +7068,6 @@ float128 float128_round_to_int(float128 a, float_status *status)
}
-/*----------------------------------------------------------------------------
-| Returns the result of adding the absolute values of the quadruple-precision
-| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
-| before being returned. `zSign' is ignored if the result is a NaN.
-| The addition is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
- float_status *status)
-{
- int32_t aExp, bExp, zExp;
- uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
- int32_t expDiff;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- bSig1 = extractFloat128Frac1( b );
- bSig0 = extractFloat128Frac0( b );
- bExp = extractFloat128Exp( b );
- expDiff = aExp - bExp;
- if ( 0 < expDiff ) {
- if ( aExp == 0x7FFF ) {
- if (aSig0 | aSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) {
- --expDiff;
- }
- else {
- bSig0 |= UINT64_C(0x0001000000000000);
- }
- shift128ExtraRightJamming(
- bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
- zExp = aExp;
- }
- else if ( expDiff < 0 ) {
- if ( bExp == 0x7FFF ) {
- if (bSig0 | bSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- ++expDiff;
- }
- else {
- aSig0 |= UINT64_C(0x0001000000000000);
- }
- shift128ExtraRightJamming(
- aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
- zExp = bExp;
- }
- else {
- if ( aExp == 0x7FFF ) {
- if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
- return propagateFloat128NaN(a, b, status);
- }
- return a;
- }
- add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
- if ( aExp == 0 ) {
- if (status->flush_to_zero) {
- if (zSig0 | zSig1) {
- float_raise(float_flag_output_denormal, status);
- }
- return packFloat128(zSign, 0, 0, 0);
- }
- return packFloat128( zSign, 0, zSig0, zSig1 );
- }
- zSig2 = 0;
- zSig0 |= UINT64_C(0x0002000000000000);
- zExp = aExp;
- goto shiftRight1;
- }
- aSig0 |= UINT64_C(0x0001000000000000);
- add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
- --zExp;
- if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
- ++zExp;
- shiftRight1:
- shift128ExtraRightJamming(
- zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
- roundAndPack:
- return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the absolute values of the quadruple-
-| precision floating-point values `a' and `b'. If `zSign' is 1, the
-| difference is negated before being returned. `zSign' is ignored if the
-| result is a NaN. The subtraction is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
- float_status *status)
-{
- int32_t aExp, bExp, zExp;
- uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
- int32_t expDiff;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- bSig1 = extractFloat128Frac1( b );
- bSig0 = extractFloat128Frac0( b );
- bExp = extractFloat128Exp( b );
- expDiff = aExp - bExp;
- shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
- shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
- if ( 0 < expDiff ) goto aExpBigger;
- if ( expDiff < 0 ) goto bExpBigger;
- if ( aExp == 0x7FFF ) {
- if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
- return propagateFloat128NaN(a, b, status);
- }
- float_raise(float_flag_invalid, status);
- return float128_default_nan(status);
- }
- if ( aExp == 0 ) {
- aExp = 1;
- bExp = 1;
- }
- if ( bSig0 < aSig0 ) goto aBigger;
- if ( aSig0 < bSig0 ) goto bBigger;
- if ( bSig1 < aSig1 ) goto aBigger;
- if ( aSig1 < bSig1 ) goto bBigger;
- return packFloat128(status->float_rounding_mode == float_round_down,
- 0, 0, 0);
- bExpBigger:
- if ( bExp == 0x7FFF ) {
- if (bSig0 | bSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- ++expDiff;
- }
- else {
- aSig0 |= UINT64_C(0x4000000000000000);
- }
- shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
- bSig0 |= UINT64_C(0x4000000000000000);
- bBigger:
- sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
- zExp = bExp;
- zSign ^= 1;
- goto normalizeRoundAndPack;
- aExpBigger:
- if ( aExp == 0x7FFF ) {
- if (aSig0 | aSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) {
- --expDiff;
- }
- else {
- bSig0 |= UINT64_C(0x4000000000000000);
- }
- shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
- aSig0 |= UINT64_C(0x4000000000000000);
- aBigger:
- sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
- zExp = aExp;
- normalizeRoundAndPack:
- --zExp;
- return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
- status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of adding the quadruple-precision floating-point values
-| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_add(float128 a, float128 b, float_status *status)
-{
- bool aSign, bSign;
-
- aSign = extractFloat128Sign( a );
- bSign = extractFloat128Sign( b );
- if ( aSign == bSign ) {
- return addFloat128Sigs(a, b, aSign, status);
- }
- else {
- return subFloat128Sigs(a, b, aSign, status);
- }
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the quadruple-precision floating-point
-| values `a' and `b'. The operation is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_sub(float128 a, float128 b, float_status *status)
-{
- bool aSign, bSign;
-
- aSign = extractFloat128Sign( a );
- bSign = extractFloat128Sign( b );
- if ( aSign == bSign ) {
- return subFloat128Sigs(a, b, aSign, status);
- }
- else {
- return addFloat128Sigs(a, b, aSign, status);
- }
-
-}
-
/*----------------------------------------------------------------------------
| Returns the result of multiplying the quadruple-precision floating-point
| values `a' and `b'. The operation is performed according to the IEC/IEEE
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 36/46] softfloat: Move mul_floats to softfloat-parts.c.inc
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (34 preceding siblings ...)
2021-05-16 12:34 ` [PULL 35/46] softfloat: Implement float128_add/sub via parts Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 37/46] softfloat: Move muladd_floats " Richard Henderson
` (11 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Rename to parts$N_mul.
Reimplement float128_mul with FloatParts128.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 206 ++++++++++++++------------------------
fpu/softfloat-parts.c.inc | 51 ++++++++++
2 files changed, 128 insertions(+), 129 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 8f734f6020..ac7959557c 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -533,6 +533,16 @@ typedef struct {
uint64_t frac_lo;
} FloatParts128;
+typedef struct {
+ FloatClass cls;
+ bool sign;
+ int32_t exp;
+ uint64_t frac_hi;
+ uint64_t frac_hm; /* high-middle */
+ uint64_t frac_lm; /* low-middle */
+ uint64_t frac_lo;
+} FloatParts256;
+
/* These apply to the most significant word of each FloatPartsN. */
#define DECOMPOSED_BINARY_POINT 63
#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
@@ -769,6 +779,14 @@ static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
#define parts_addsub(A, B, S, Z) \
PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
+static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
+ float_status *s);
+static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
+ float_status *s);
+
+#define parts_mul(A, B, S) \
+ PARTS_GENERIC_64_128(mul, A)(A, B, S)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@@ -859,6 +877,19 @@ static bool frac128_eqz(FloatParts128 *a)
#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
+static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
+{
+ mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
+}
+
+static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
+{
+ mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
+ &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
+}
+
+#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
+
static void frac64_neg(FloatParts64 *a)
{
a->frac = -a->frac;
@@ -955,23 +986,42 @@ static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
#define frac_sub(R, A, B) FRAC_GENERIC_64_128(sub, R)(R, A, B)
+static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
+{
+ r->frac = a->frac_hi | (a->frac_lo != 0);
+}
+
+static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
+{
+ r->frac_hi = a->frac_hi;
+ r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
+}
+
+#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
+
#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
#define FloatPartsN glue(FloatParts,N)
+#define FloatPartsW glue(FloatParts,W)
#define N 64
+#define W 128
#include "softfloat-parts-addsub.c.inc"
#include "softfloat-parts.c.inc"
#undef N
+#undef W
#define N 128
+#define W 256
#include "softfloat-parts-addsub.c.inc"
#include "softfloat-parts.c.inc"
#undef N
+#undef W
#undef partsN
#undef FloatPartsN
+#undef FloatPartsW
/*
* Pack/unpack routines with a specific FloatFmt.
@@ -1250,89 +1300,42 @@ float128 float128_sub(float128 a, float128 b, float_status *status)
}
/*
- * Returns the result of multiplying the floating-point values `a' and
- * `b'. The operation is performed according to the IEC/IEEE Standard
- * for Binary Floating-Point Arithmetic.
+ * Multiplication
*/
-static FloatParts64 mul_floats(FloatParts64 a, FloatParts64 b, float_status *s)
-{
- bool sign = a.sign ^ b.sign;
-
- if (a.cls == float_class_normal && b.cls == float_class_normal) {
- uint64_t hi, lo;
- int exp = a.exp + b.exp;
-
- mul64To128(a.frac, b.frac, &hi, &lo);
- if (hi & DECOMPOSED_IMPLICIT_BIT) {
- exp += 1;
- } else {
- hi <<= 1;
- }
- hi |= (lo != 0);
-
- /* Re-use a */
- a.exp = exp;
- a.sign = sign;
- a.frac = hi;
- return a;
- }
- /* handle all the NaN cases */
- if (is_nan(a.cls) || is_nan(b.cls)) {
- return *parts_pick_nan(&a, &b, s);
- }
- /* Inf * Zero == NaN */
- if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
- (a.cls == float_class_zero && b.cls == float_class_inf)) {
- float_raise(float_flag_invalid, s);
- parts_default_nan(&a, s);
- return a;
- }
- /* Multiply by 0 or Inf */
- if (a.cls == float_class_inf || a.cls == float_class_zero) {
- a.sign = sign;
- return a;
- }
- if (b.cls == float_class_inf || b.cls == float_class_zero) {
- b.sign = sign;
- return b;
- }
- g_assert_not_reached();
-}
-
float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
float16_unpack_canonical(&pa, a, status);
float16_unpack_canonical(&pb, b, status);
- pr = mul_floats(pa, pb, status);
+ pr = parts_mul(&pa, &pb, status);
- return float16_round_pack_canonical(&pr, status);
+ return float16_round_pack_canonical(pr, status);
}
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_mul(float32 a, float32 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
float32_unpack_canonical(&pa, a, status);
float32_unpack_canonical(&pb, b, status);
- pr = mul_floats(pa, pb, status);
+ pr = parts_mul(&pa, &pb, status);
- return float32_round_pack_canonical(&pr, status);
+ return float32_round_pack_canonical(pr, status);
}
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_mul(float64 a, float64 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
float64_unpack_canonical(&pa, a, status);
float64_unpack_canonical(&pb, b, status);
- pr = mul_floats(pa, pb, status);
+ pr = parts_mul(&pa, &pb, status);
- return float64_round_pack_canonical(&pr, status);
+ return float64_round_pack_canonical(pr, status);
}
static float hard_f32_mul(float a, float b)
@@ -1359,20 +1362,28 @@ float64_mul(float64 a, float64 b, float_status *s)
f64_is_zon2, f64_addsubmul_post);
}
-/*
- * Returns the result of multiplying the bfloat16
- * values `a' and `b'.
- */
-
-bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
+bfloat16 QEMU_FLATTEN
+bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
bfloat16_unpack_canonical(&pa, a, status);
bfloat16_unpack_canonical(&pb, b, status);
- pr = mul_floats(pa, pb, status);
+ pr = parts_mul(&pa, &pb, status);
- return bfloat16_round_pack_canonical(&pr, status);
+ return bfloat16_round_pack_canonical(pr, status);
+}
+
+float128 QEMU_FLATTEN
+float128_mul(float128 a, float128 b, float_status *status)
+{
+ FloatParts128 pa, pb, *pr;
+
+ float128_unpack_canonical(&pa, a, status);
+ float128_unpack_canonical(&pb, b, status);
+ pr = parts_mul(&pa, &pb, status);
+
+ return float128_round_pack_canonical(pr, status);
}
/*
@@ -7068,69 +7079,6 @@ float128 float128_round_to_int(float128 a, float_status *status)
}
-/*----------------------------------------------------------------------------
-| Returns the result of multiplying the quadruple-precision floating-point
-| values `a' and `b'. The operation is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_mul(float128 a, float128 b, float_status *status)
-{
- bool aSign, bSign, zSign;
- int32_t aExp, bExp, zExp;
- uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- bSig1 = extractFloat128Frac1( b );
- bSig0 = extractFloat128Frac0( b );
- bExp = extractFloat128Exp( b );
- bSign = extractFloat128Sign( b );
- zSign = aSign ^ bSign;
- if ( aExp == 0x7FFF ) {
- if ( ( aSig0 | aSig1 )
- || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
- return propagateFloat128NaN(a, b, status);
- }
- if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- if ( bExp == 0x7FFF ) {
- if (bSig0 | bSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return float128_default_nan(status);
- }
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
- normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
- }
- if ( bExp == 0 ) {
- if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
- normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
- }
- zExp = aExp + bExp - 0x4000;
- aSig0 |= UINT64_C(0x0001000000000000);
- shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
- mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
- add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
- zSig2 |= ( zSig3 != 0 );
- if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
- shift128ExtraRightJamming(
- zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
- ++zExp;
- }
- return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
-
-}
-
/*----------------------------------------------------------------------------
| Returns the result of dividing the quadruple-precision floating-point value
| `a' by the corresponding value `b'. The operation is performed according to
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index cfce9f6421..9a67ab2bea 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -362,3 +362,54 @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
p_nan:
return parts_pick_nan(a, b, s);
}
+
+/*
+ * Returns the result of multiplying the floating-point values `a' and
+ * `b'. The operation is performed according to the IEC/IEEE Standard
+ * for Binary Floating-Point Arithmetic.
+ */
+static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
+ float_status *s)
+{
+ int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+ bool sign = a->sign ^ b->sign;
+
+ if (likely(ab_mask == float_cmask_normal)) {
+ FloatPartsW tmp;
+
+ frac_mulw(&tmp, a, b);
+ frac_truncjam(a, &tmp);
+
+ a->exp += b->exp + 1;
+ if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
+ frac_add(a, a, a);
+ a->exp -= 1;
+ }
+
+ a->sign = sign;
+ return a;
+ }
+
+ /* Inf * Zero == NaN */
+ if (unlikely(ab_mask == float_cmask_infzero)) {
+ float_raise(float_flag_invalid, s);
+ parts_default_nan(a, s);
+ return a;
+ }
+
+ if (unlikely(ab_mask & float_cmask_anynan)) {
+ return parts_pick_nan(a, b, s);
+ }
+
+ /* Multiply by 0 or Inf */
+ if (ab_mask & float_cmask_inf) {
+ a->cls = float_class_inf;
+ a->sign = sign;
+ return a;
+ }
+
+ g_assert(ab_mask & float_cmask_zero);
+ a->cls = float_class_zero;
+ a->sign = sign;
+ return a;
+}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 37/46] softfloat: Move muladd_floats to softfloat-parts.c.inc
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (35 preceding siblings ...)
2021-05-16 12:34 ` [PULL 36/46] softfloat: Move mul_floats to softfloat-parts.c.inc Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 38/46] softfloat: Use mulu64 for mul64To128 Richard Henderson
` (10 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Rename to parts$N_muladd.
Implement float128_muladd with FloatParts128.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat.h | 2 +
fpu/softfloat.c | 406 ++++++++++++++++++--------------------
tests/fp/fp-bench.c | 8 +-
tests/fp/fp-test.c | 2 +-
fpu/softfloat-parts.c.inc | 126 ++++++++++++
tests/fp/wrap.c.inc | 12 ++
6 files changed, 342 insertions(+), 214 deletions(-)
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 019c2ec66d..53f2c2ea3c 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -1197,6 +1197,8 @@ float128 float128_round_to_int(float128, float_status *status);
float128 float128_add(float128, float128, float_status *status);
float128 float128_sub(float128, float128, float_status *status);
float128 float128_mul(float128, float128, float_status *status);
+float128 float128_muladd(float128, float128, float128, int,
+ float_status *status);
float128 float128_div(float128, float128, float_status *status);
float128 float128_rem(float128, float128, float_status *status);
float128 float128_sqrt(float128, float_status *status);
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index ac7959557c..571309e74f 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -716,6 +716,10 @@ static float128 float128_pack_raw(const FloatParts128 *p)
#define PARTS_GENERIC_64_128(NAME, P) \
QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
+#define PARTS_GENERIC_64_128_256(NAME, P) \
+ QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
+ (FloatParts128 *, parts128_##NAME), parts64_##NAME)
+
#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
@@ -761,15 +765,17 @@ static void parts128_uncanon(FloatParts128 *p, float_status *status,
static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
+static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
#define parts_add_normal(A, B) \
- PARTS_GENERIC_64_128(add_normal, A)(A, B)
+ PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
+static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
#define parts_sub_normal(A, B) \
- PARTS_GENERIC_64_128(sub_normal, A)(A, B)
+ PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
float_status *s, bool subtract);
@@ -787,6 +793,16 @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
#define parts_mul(A, B, S) \
PARTS_GENERIC_64_128(mul, A)(A, B, S)
+static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
+ FloatParts64 *c, int flags,
+ float_status *s);
+static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
+ FloatParts128 *c, int flags,
+ float_status *s);
+
+#define parts_muladd(A, B, C, Z, S) \
+ PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@@ -794,6 +810,10 @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
#define FRAC_GENERIC_64_128(NAME, P) \
QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
+#define FRAC_GENERIC_64_128_256(NAME, P) \
+ QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
+ (FloatParts128 *, frac128_##NAME), frac64_##NAME)
+
static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
{
return uadd64_overflow(a->frac, b->frac, &r->frac);
@@ -807,7 +827,17 @@ static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
return c;
}
-#define frac_add(R, A, B) FRAC_GENERIC_64_128(add, R)(R, A, B)
+static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
+{
+ bool c = 0;
+ r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
+ r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
+ r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
+ r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
+ return c;
+}
+
+#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
{
@@ -902,7 +932,16 @@ static void frac128_neg(FloatParts128 *a)
a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
}
-#define frac_neg(A) FRAC_GENERIC_64_128(neg, A)(A)
+static void frac256_neg(FloatParts256 *a)
+{
+ bool c = 0;
+ a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
+ a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
+ a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
+ a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
+}
+
+#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
static int frac64_normalize(FloatParts64 *a)
{
@@ -933,7 +972,55 @@ static int frac128_normalize(FloatParts128 *a)
return 128;
}
-#define frac_normalize(A) FRAC_GENERIC_64_128(normalize, A)(A)
+static int frac256_normalize(FloatParts256 *a)
+{
+ uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
+ uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
+ int ret, shl, shr;
+
+ if (likely(a0)) {
+ shl = clz64(a0);
+ if (shl == 0) {
+ return 0;
+ }
+ ret = shl;
+ } else {
+ if (a1) {
+ ret = 64;
+ a0 = a1, a1 = a2, a2 = a3, a3 = 0;
+ } else if (a2) {
+ ret = 128;
+ a0 = a2, a1 = a3, a2 = 0, a3 = 0;
+ } else if (a3) {
+ ret = 192;
+ a0 = a3, a1 = 0, a2 = 0, a3 = 0;
+ } else {
+ ret = 256;
+ a0 = 0, a1 = 0, a2 = 0, a3 = 0;
+ goto done;
+ }
+ shl = clz64(a0);
+ if (shl == 0) {
+ goto done;
+ }
+ ret += shl;
+ }
+
+ shr = -shl & 63;
+ a0 = (a0 << shl) | (a1 >> shr);
+ a1 = (a1 << shl) | (a2 >> shr);
+ a2 = (a2 << shl) | (a3 >> shr);
+ a3 = (a3 << shl);
+
+ done:
+ a->frac_hi = a0;
+ a->frac_hm = a1;
+ a->frac_lm = a2;
+ a->frac_lo = a3;
+ return ret;
+}
+
+#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
static void frac64_shl(FloatParts64 *a, int c)
{
@@ -969,7 +1056,51 @@ static void frac128_shrjam(FloatParts128 *a, int c)
shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
}
-#define frac_shrjam(A, C) FRAC_GENERIC_64_128(shrjam, A)(A, C)
+static void frac256_shrjam(FloatParts256 *a, int c)
+{
+ uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
+ uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
+ uint64_t sticky = 0;
+ int invc;
+
+ if (unlikely(c == 0)) {
+ return;
+ } else if (likely(c < 64)) {
+ /* nothing */
+ } else if (likely(c < 256)) {
+ if (unlikely(c & 128)) {
+ sticky |= a2 | a3;
+ a3 = a1, a2 = a0, a1 = 0, a0 = 0;
+ }
+ if (unlikely(c & 64)) {
+ sticky |= a3;
+ a3 = a2, a2 = a1, a1 = a0, a0 = 0;
+ }
+ c &= 63;
+ if (c == 0) {
+ goto done;
+ }
+ } else {
+ sticky = a0 | a1 | a2 | a3;
+ a0 = a1 = a2 = a3 = 0;
+ goto done;
+ }
+
+ invc = -c & 63;
+ sticky |= a3 << invc;
+ a3 = (a3 >> c) | (a2 << invc);
+ a2 = (a2 >> c) | (a1 << invc);
+ a1 = (a1 >> c) | (a0 << invc);
+ a0 = (a0 >> c);
+
+ done:
+ a->frac_lo = a3 | (sticky != 0);
+ a->frac_lm = a2;
+ a->frac_hm = a1;
+ a->frac_hi = a0;
+}
+
+#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
{
@@ -984,7 +1115,17 @@ static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
return c;
}
-#define frac_sub(R, A, B) FRAC_GENERIC_64_128(sub, R)(R, A, B)
+static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
+{
+ bool c = 0;
+ r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
+ r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
+ r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
+ r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
+ return c;
+}
+
+#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
{
@@ -999,6 +1140,22 @@ static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
+static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
+{
+ r->frac_hi = a->frac;
+ r->frac_lo = 0;
+}
+
+static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
+{
+ r->frac_hi = a->frac_hi;
+ r->frac_hm = a->frac_lo;
+ r->frac_lm = 0;
+ r->frac_lo = 0;
+}
+
+#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
+
#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
#define FloatPartsN glue(FloatParts,N)
#define FloatPartsW glue(FloatParts,W)
@@ -1017,6 +1174,12 @@ static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
#include "softfloat-parts-addsub.c.inc"
#include "softfloat-parts.c.inc"
+#undef N
+#undef W
+#define N 256
+
+#include "softfloat-parts-addsub.c.inc"
+
#undef N
#undef W
#undef partsN
@@ -1387,230 +1550,48 @@ float128_mul(float128 a, float128 b, float_status *status)
}
/*
- * Returns the result of multiplying the floating-point values `a' and
- * `b' then adding 'c', with no intermediate rounding step after the
- * multiplication. The operation is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
- * The flags argument allows the caller to select negation of the
- * addend, the intermediate product, or the final result. (The
- * difference between this and having the caller do a separate
- * negation is that negating externally will flip the sign bit on
- * NaNs.)
+ * Fused multiply-add
*/
-static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c,
- int flags, float_status *s)
-{
- bool inf_zero, p_sign;
- bool sign_flip = flags & float_muladd_negate_result;
- FloatClass p_class;
- uint64_t hi, lo;
- int p_exp;
- int ab_mask, abc_mask;
-
- ab_mask = float_cmask(a.cls) | float_cmask(b.cls);
- abc_mask = float_cmask(c.cls) | ab_mask;
- inf_zero = ab_mask == float_cmask_infzero;
-
- /* It is implementation-defined whether the cases of (0,inf,qnan)
- * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
- * they return if they do), so we have to hand this information
- * off to the target-specific pick-a-NaN routine.
- */
- if (unlikely(abc_mask & float_cmask_anynan)) {
- return *parts_pick_nan_muladd(&a, &b, &c, s, ab_mask, abc_mask);
- }
-
- if (inf_zero) {
- float_raise(float_flag_invalid, s);
- parts_default_nan(&a, s);
- return a;
- }
-
- if (flags & float_muladd_negate_c) {
- c.sign ^= 1;
- }
-
- p_sign = a.sign ^ b.sign;
-
- if (flags & float_muladd_negate_product) {
- p_sign ^= 1;
- }
-
- if (ab_mask & float_cmask_inf) {
- p_class = float_class_inf;
- } else if (ab_mask & float_cmask_zero) {
- p_class = float_class_zero;
- } else {
- p_class = float_class_normal;
- }
-
- if (c.cls == float_class_inf) {
- if (p_class == float_class_inf && p_sign != c.sign) {
- float_raise(float_flag_invalid, s);
- parts_default_nan(&c, s);
- } else {
- c.sign ^= sign_flip;
- }
- return c;
- }
-
- if (p_class == float_class_inf) {
- a.cls = float_class_inf;
- a.sign = p_sign ^ sign_flip;
- return a;
- }
-
- if (p_class == float_class_zero) {
- if (c.cls == float_class_zero) {
- if (p_sign != c.sign) {
- p_sign = s->float_rounding_mode == float_round_down;
- }
- c.sign = p_sign;
- } else if (flags & float_muladd_halve_result) {
- c.exp -= 1;
- }
- c.sign ^= sign_flip;
- return c;
- }
-
- /* a & b should be normals now... */
- assert(a.cls == float_class_normal &&
- b.cls == float_class_normal);
-
- p_exp = a.exp + b.exp;
-
- mul64To128(a.frac, b.frac, &hi, &lo);
-
- /* Renormalize to the msb. */
- if (hi & DECOMPOSED_IMPLICIT_BIT) {
- p_exp += 1;
- } else {
- shortShift128Left(hi, lo, 1, &hi, &lo);
- }
-
- /* + add/sub */
- if (c.cls != float_class_zero) {
- int exp_diff = p_exp - c.exp;
- if (p_sign == c.sign) {
- /* Addition */
- if (exp_diff <= 0) {
- shift64RightJamming(hi, -exp_diff, &hi);
- p_exp = c.exp;
- if (uadd64_overflow(hi, c.frac, &hi)) {
- shift64RightJamming(hi, 1, &hi);
- hi |= DECOMPOSED_IMPLICIT_BIT;
- p_exp += 1;
- }
- } else {
- uint64_t c_hi, c_lo, over;
- shift128RightJamming(c.frac, 0, exp_diff, &c_hi, &c_lo);
- add192(0, hi, lo, 0, c_hi, c_lo, &over, &hi, &lo);
- if (over) {
- shift64RightJamming(hi, 1, &hi);
- hi |= DECOMPOSED_IMPLICIT_BIT;
- p_exp += 1;
- }
- }
- } else {
- /* Subtraction */
- uint64_t c_hi = c.frac, c_lo = 0;
-
- if (exp_diff <= 0) {
- shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
- if (exp_diff == 0
- &&
- (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
- sub128(hi, lo, c_hi, c_lo, &hi, &lo);
- } else {
- sub128(c_hi, c_lo, hi, lo, &hi, &lo);
- p_sign ^= 1;
- p_exp = c.exp;
- }
- } else {
- shift128RightJamming(c_hi, c_lo,
- exp_diff,
- &c_hi, &c_lo);
- sub128(hi, lo, c_hi, c_lo, &hi, &lo);
- }
-
- if (hi == 0 && lo == 0) {
- a.cls = float_class_zero;
- a.sign = s->float_rounding_mode == float_round_down;
- a.sign ^= sign_flip;
- return a;
- } else {
- int shift;
- if (hi != 0) {
- shift = clz64(hi);
- } else {
- shift = clz64(lo) + 64;
- }
- /* Normalizing to a binary point of 124 is the
- correct adjust for the exponent. However since we're
- shifting, we might as well put the binary point back
- at 63 where we really want it. Therefore shift as
- if we're leaving 1 bit at the top of the word, but
- adjust the exponent as if we're leaving 3 bits. */
- shift128Left(hi, lo, shift, &hi, &lo);
- p_exp -= shift;
- }
- }
- }
- hi |= (lo != 0);
-
- if (flags & float_muladd_halve_result) {
- p_exp -= 1;
- }
-
- /* finally prepare our result */
- a.cls = float_class_normal;
- a.sign = p_sign ^ sign_flip;
- a.exp = p_exp;
- a.frac = hi;
-
- return a;
-}
-
float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
- int flags, float_status *status)
+ int flags, float_status *status)
{
- FloatParts64 pa, pb, pc, pr;
+ FloatParts64 pa, pb, pc, *pr;
float16_unpack_canonical(&pa, a, status);
float16_unpack_canonical(&pb, b, status);
float16_unpack_canonical(&pc, c, status);
- pr = muladd_floats(pa, pb, pc, flags, status);
+ pr = parts_muladd(&pa, &pb, &pc, flags, status);
- return float16_round_pack_canonical(&pr, status);
+ return float16_round_pack_canonical(pr, status);
}
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
float_status *status)
{
- FloatParts64 pa, pb, pc, pr;
+ FloatParts64 pa, pb, pc, *pr;
float32_unpack_canonical(&pa, a, status);
float32_unpack_canonical(&pb, b, status);
float32_unpack_canonical(&pc, c, status);
- pr = muladd_floats(pa, pb, pc, flags, status);
+ pr = parts_muladd(&pa, &pb, &pc, flags, status);
- return float32_round_pack_canonical(&pr, status);
+ return float32_round_pack_canonical(pr, status);
}
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
float_status *status)
{
- FloatParts64 pa, pb, pc, pr;
+ FloatParts64 pa, pb, pc, *pr;
float64_unpack_canonical(&pa, a, status);
float64_unpack_canonical(&pb, b, status);
float64_unpack_canonical(&pc, c, status);
- pr = muladd_floats(pa, pb, pc, flags, status);
+ pr = parts_muladd(&pa, &pb, &pc, flags, status);
- return float64_round_pack_canonical(&pr, status);
+ return float64_round_pack_canonical(pr, status);
}
static bool force_soft_fma;
@@ -1757,23 +1738,30 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
}
-/*
- * Returns the result of multiplying the bfloat16 values `a'
- * and `b' then adding 'c', with no intermediate rounding step after the
- * multiplication.
- */
-
bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
int flags, float_status *status)
{
- FloatParts64 pa, pb, pc, pr;
+ FloatParts64 pa, pb, pc, *pr;
bfloat16_unpack_canonical(&pa, a, status);
bfloat16_unpack_canonical(&pb, b, status);
bfloat16_unpack_canonical(&pc, c, status);
- pr = muladd_floats(pa, pb, pc, flags, status);
+ pr = parts_muladd(&pa, &pb, &pc, flags, status);
- return bfloat16_round_pack_canonical(&pr, status);
+ return bfloat16_round_pack_canonical(pr, status);
+}
+
+float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
+ int flags, float_status *status)
+{
+ FloatParts128 pa, pb, pc, *pr;
+
+ float128_unpack_canonical(&pa, a, status);
+ float128_unpack_canonical(&pb, b, status);
+ float128_unpack_canonical(&pc, c, status);
+ pr = parts_muladd(&pa, &pb, &pc, flags, status);
+
+ return float128_round_pack_canonical(pr, status);
}
/*
diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c
index d319993280..c24baf8535 100644
--- a/tests/fp/fp-bench.c
+++ b/tests/fp/fp-bench.c
@@ -386,7 +386,7 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
for (i = 0; i < OPS_PER_ITER; i++) {
float128 a = ops[0].f128;
float128 b = ops[1].f128;
- /* float128 c = ops[2].f128; */
+ float128 c = ops[2].f128;
switch (op) {
case OP_ADD:
@@ -401,9 +401,9 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
case OP_DIV:
res.f128 = float128_div(a, b, &soft_status);
break;
- /* case OP_FMA: */
- /* res.f128 = float128_muladd(a, b, c, 0, &soft_status); */
- /* break; */
+ case OP_FMA:
+ res.f128 = float128_muladd(a, b, c, 0, &soft_status);
+ break;
case OP_SQRT:
res.f128 = float128_sqrt(a, &soft_status);
break;
diff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c
index 5a4cad8c8b..ff131afbde 100644
--- a/tests/fp/fp-test.c
+++ b/tests/fp/fp-test.c
@@ -717,7 +717,7 @@ static void do_testfloat(int op, int rmode, bool exact)
test_abz_f128(true_abz_f128M, subj_abz_f128M);
break;
case F128_MULADD:
- not_implemented();
+ test_abcz_f128(slow_f128M_mulAdd, qemu_f128M_mulAdd);
break;
case F128_SQRT:
test_az_f128(slow_f128M_sqrt, qemu_f128M_sqrt);
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 9a67ab2bea..a203811299 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -413,3 +413,129 @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
a->sign = sign;
return a;
}
+
+/*
+ * Returns the result of multiplying the floating-point values `a' and
+ * `b' then adding 'c', with no intermediate rounding step after the
+ * multiplication. The operation is performed according to the
+ * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
+ * The flags argument allows the caller to select negation of the
+ * addend, the intermediate product, or the final result. (The
+ * difference between this and having the caller do a separate
+ * negation is that negating externally will flip the sign bit on NaNs.)
+ *
+ * Requires A and C extracted into a double-sized structure to provide the
+ * extra space for the widening multiply.
+ */
+static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
+ FloatPartsN *c, int flags, float_status *s)
+{
+ int ab_mask, abc_mask;
+ FloatPartsW p_widen, c_widen;
+
+ ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+ abc_mask = float_cmask(c->cls) | ab_mask;
+
+ /*
+ * It is implementation-defined whether the cases of (0,inf,qnan)
+ * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
+ * they return if they do), so we have to hand this information
+ * off to the target-specific pick-a-NaN routine.
+ */
+ if (unlikely(abc_mask & float_cmask_anynan)) {
+ return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
+ }
+
+ if (flags & float_muladd_negate_c) {
+ c->sign ^= 1;
+ }
+
+ /* Compute the sign of the product into A. */
+ a->sign ^= b->sign;
+ if (flags & float_muladd_negate_product) {
+ a->sign ^= 1;
+ }
+
+ if (unlikely(ab_mask != float_cmask_normal)) {
+ if (unlikely(ab_mask == float_cmask_infzero)) {
+ goto d_nan;
+ }
+
+ if (ab_mask & float_cmask_inf) {
+ if (c->cls == float_class_inf && a->sign != c->sign) {
+ goto d_nan;
+ }
+ goto return_inf;
+ }
+
+ g_assert(ab_mask & float_cmask_zero);
+ if (c->cls == float_class_normal) {
+ *a = *c;
+ goto return_normal;
+ }
+ if (c->cls == float_class_zero) {
+ if (a->sign != c->sign) {
+ goto return_sub_zero;
+ }
+ goto return_zero;
+ }
+ g_assert(c->cls == float_class_inf);
+ }
+
+ if (unlikely(c->cls == float_class_inf)) {
+ a->sign = c->sign;
+ goto return_inf;
+ }
+
+ /* Perform the multiplication step. */
+ p_widen.sign = a->sign;
+ p_widen.exp = a->exp + b->exp + 1;
+ frac_mulw(&p_widen, a, b);
+ if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
+ frac_add(&p_widen, &p_widen, &p_widen);
+ p_widen.exp -= 1;
+ }
+
+ /* Perform the addition step. */
+ if (c->cls != float_class_zero) {
+ /* Zero-extend C to less significant bits. */
+ frac_widen(&c_widen, c);
+ c_widen.exp = c->exp;
+
+ if (a->sign == c->sign) {
+ parts_add_normal(&p_widen, &c_widen);
+ } else if (!parts_sub_normal(&p_widen, &c_widen)) {
+ goto return_sub_zero;
+ }
+ }
+
+ /* Narrow with sticky bit, for proper rounding later. */
+ frac_truncjam(a, &p_widen);
+ a->sign = p_widen.sign;
+ a->exp = p_widen.exp;
+
+ return_normal:
+ if (flags & float_muladd_halve_result) {
+ a->exp -= 1;
+ }
+ finish_sign:
+ if (flags & float_muladd_negate_result) {
+ a->sign ^= 1;
+ }
+ return a;
+
+ return_sub_zero:
+ a->sign = s->float_rounding_mode == float_round_down;
+ return_zero:
+ a->cls = float_class_zero;
+ goto finish_sign;
+
+ return_inf:
+ a->cls = float_class_inf;
+ goto finish_sign;
+
+ d_nan:
+ float_raise(float_flag_invalid, s);
+ parts_default_nan(a, s);
+ return a;
+}
diff --git a/tests/fp/wrap.c.inc b/tests/fp/wrap.c.inc
index 0cbd20013e..cb1bb77e4c 100644
--- a/tests/fp/wrap.c.inc
+++ b/tests/fp/wrap.c.inc
@@ -574,6 +574,18 @@ WRAP_MULADD(qemu_f32_mulAdd, float32_muladd, float32)
WRAP_MULADD(qemu_f64_mulAdd, float64_muladd, float64)
#undef WRAP_MULADD
+static void qemu_f128M_mulAdd(const float128_t *ap, const float128_t *bp,
+ const float128_t *cp, float128_t *res)
+{
+ float128 a, b, c, ret;
+
+ a = soft_to_qemu128(*ap);
+ b = soft_to_qemu128(*bp);
+ c = soft_to_qemu128(*cp);
+ ret = float128_muladd(a, b, c, 0, &qsf);
+ *res = qemu_to_soft128(ret);
+}
+
#define WRAP_CMP16(name, func, retcond) \
static bool name(float16_t a, float16_t b) \
{ \
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 38/46] softfloat: Use mulu64 for mul64To128
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (36 preceding siblings ...)
2021-05-16 12:34 ` [PULL 37/46] softfloat: Move muladd_floats " Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 39/46] softfloat: Use add192 in mul128To256 Richard Henderson
` (9 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée, David Hildenbrand
Via host-utils.h, we use a host widening multiply for
64-bit hosts, and a common subroutine for 32-bit hosts.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat-macros.h | 23 +++--------------------
1 file changed, 3 insertions(+), 20 deletions(-)
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index 2e3760a9c1..f6dfbe108d 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -470,27 +470,10 @@ static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2,
| `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
-static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void
+mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr)
{
- uint32_t aHigh, aLow, bHigh, bLow;
- uint64_t z0, zMiddleA, zMiddleB, z1;
-
- aLow = a;
- aHigh = a>>32;
- bLow = b;
- bHigh = b>>32;
- z1 = ( (uint64_t) aLow ) * bLow;
- zMiddleA = ( (uint64_t) aLow ) * bHigh;
- zMiddleB = ( (uint64_t) aHigh ) * bLow;
- z0 = ( (uint64_t) aHigh ) * bHigh;
- zMiddleA += zMiddleB;
- z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
- zMiddleA <<= 32;
- z1 += zMiddleA;
- z0 += ( z1 < zMiddleA );
- *z1Ptr = z1;
- *z0Ptr = z0;
-
+ mulu64(z1Ptr, z0Ptr, a, b);
}
/*----------------------------------------------------------------------------
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 39/46] softfloat: Use add192 in mul128To256
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (37 preceding siblings ...)
2021-05-16 12:34 ` [PULL 38/46] softfloat: Use mulu64 for mul64To128 Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 40/46] softfloat: Tidy mul128By64To192 Richard Henderson
` (8 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
We can perform the operation in 6 total adds instead of 8.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat-macros.h | 37 +++++++++++-----------------------
1 file changed, 12 insertions(+), 25 deletions(-)
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index f6dfbe108d..76327d844d 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -511,34 +511,21 @@ static inline void
| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
*----------------------------------------------------------------------------*/
-static inline void
- mul128To256(
- uint64_t a0,
- uint64_t a1,
- uint64_t b0,
- uint64_t b1,
- uint64_t *z0Ptr,
- uint64_t *z1Ptr,
- uint64_t *z2Ptr,
- uint64_t *z3Ptr
- )
+static inline void mul128To256(uint64_t a0, uint64_t a1,
+ uint64_t b0, uint64_t b1,
+ uint64_t *z0Ptr, uint64_t *z1Ptr,
+ uint64_t *z2Ptr, uint64_t *z3Ptr)
{
- uint64_t z0, z1, z2, z3;
- uint64_t more1, more2;
+ uint64_t z0, z1, z2;
+ uint64_t m0, m1, m2, n1, n2;
- mul64To128( a1, b1, &z2, &z3 );
- mul64To128( a1, b0, &z1, &more2 );
- add128( z1, more2, 0, z2, &z1, &z2 );
- mul64To128( a0, b0, &z0, &more1 );
- add128( z0, more1, 0, z1, &z0, &z1 );
- mul64To128( a0, b1, &more1, &more2 );
- add128( more1, more2, 0, z2, &more1, &z2 );
- add128( z0, z1, 0, more1, &z0, &z1 );
- *z3Ptr = z3;
- *z2Ptr = z2;
- *z1Ptr = z1;
- *z0Ptr = z0;
+ mul64To128(a1, b0, &m1, &m2);
+ mul64To128(a0, b1, &n1, &n2);
+ mul64To128(a1, b1, &z2, z3Ptr);
+ mul64To128(a0, b0, &z0, &z1);
+ add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2);
+ add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);
}
/*----------------------------------------------------------------------------
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 40/46] softfloat: Tidy mul128By64To192
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (38 preceding siblings ...)
2021-05-16 12:34 ` [PULL 39/46] softfloat: Use add192 in mul128To256 Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 41/46] softfloat: Introduce sh[lr]_double primitives Richard Henderson
` (7 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Clean up the formatting and variables; no functional change.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat-macros.h | 22 ++++++----------------
1 file changed, 6 insertions(+), 16 deletions(-)
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index 76327d844d..672c1db555 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -484,24 +484,14 @@ mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr)
*----------------------------------------------------------------------------*/
static inline void
- mul128By64To192(
- uint64_t a0,
- uint64_t a1,
- uint64_t b,
- uint64_t *z0Ptr,
- uint64_t *z1Ptr,
- uint64_t *z2Ptr
- )
+mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b,
+ uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
{
- uint64_t z0, z1, z2, more1;
-
- mul64To128( a1, b, &z1, &z2 );
- mul64To128( a0, b, &z0, &more1 );
- add128( z0, more1, 0, z1, &z0, &z1 );
- *z2Ptr = z2;
- *z1Ptr = z1;
- *z0Ptr = z0;
+ uint64_t z0, z1, m1;
+ mul64To128(a1, b, &m1, z2Ptr);
+ mul64To128(a0, b, &z0, &z1);
+ add128(z0, z1, 0, m1, z0Ptr, z1Ptr);
}
/*----------------------------------------------------------------------------
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 41/46] softfloat: Introduce sh[lr]_double primitives
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (39 preceding siblings ...)
2021-05-16 12:34 ` [PULL 40/46] softfloat: Tidy mul128By64To192 Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 42/46] softfloat: Move div_floats to softfloat-parts.c.inc Richard Henderson
` (6 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Have x86_64 assembly for them, with a fallback.
This avoids shuffling values through %cl in the x86 case.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat-macros.h | 36 ++++++++++++
fpu/softfloat.c | 102 +++++++++++++++++++++++++--------
2 files changed, 115 insertions(+), 23 deletions(-)
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index 672c1db555..ec4e27a595 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -85,6 +85,42 @@ this code that are retained.
#include "fpu/softfloat-types.h"
#include "qemu/host-utils.h"
+/**
+ * shl_double: double-word merging left shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @l left by @c bits, shifting in bits from @r.
+ */
+static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+ asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
+ return l;
+#else
+ return c ? (l << c) | (r >> (64 - c)) : l;
+#endif
+}
+
+/**
+ * shr_double: double-word merging right shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @r right by @c bits, shifting in bits from @l.
+ */
+static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+ asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
+ return r;
+#else
+ return c ? (r >> c) | (l << (64 - c)) : r;
+#endif
+}
+
/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'. If any nonzero
| bits are shifted off, they are ``jammed'' into the least significant bit of
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 571309e74f..34689959a9 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -957,15 +957,12 @@ static int frac128_normalize(FloatParts128 *a)
{
if (a->frac_hi) {
int shl = clz64(a->frac_hi);
- if (shl) {
- int shr = 64 - shl;
- a->frac_hi = (a->frac_hi << shl) | (a->frac_lo >> shr);
- a->frac_lo = (a->frac_lo << shl);
- }
+ a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
+ a->frac_lo <<= shl;
return shl;
} else if (a->frac_lo) {
int shl = clz64(a->frac_lo);
- a->frac_hi = (a->frac_lo << shl);
+ a->frac_hi = a->frac_lo << shl;
a->frac_lo = 0;
return shl + 64;
}
@@ -976,7 +973,7 @@ static int frac256_normalize(FloatParts256 *a)
{
uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
- int ret, shl, shr;
+ int ret, shl;
if (likely(a0)) {
shl = clz64(a0);
@@ -1006,11 +1003,10 @@ static int frac256_normalize(FloatParts256 *a)
ret += shl;
}
- shr = -shl & 63;
- a0 = (a0 << shl) | (a1 >> shr);
- a1 = (a1 << shl) | (a2 >> shr);
- a2 = (a2 << shl) | (a3 >> shr);
- a3 = (a3 << shl);
+ a0 = shl_double(a0, a1, shl);
+ a1 = shl_double(a1, a2, shl);
+ a2 = shl_double(a2, a3, shl);
+ a3 <<= shl;
done:
a->frac_hi = a0;
@@ -1029,7 +1025,20 @@ static void frac64_shl(FloatParts64 *a, int c)
static void frac128_shl(FloatParts128 *a, int c)
{
- shift128Left(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+ uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+
+ if (c & 64) {
+ a0 = a1, a1 = 0;
+ }
+
+ c &= 63;
+ if (c) {
+ a0 = shl_double(a0, a1, c);
+ a1 = a1 << c;
+ }
+
+ a->frac_hi = a0;
+ a->frac_lo = a1;
}
#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
@@ -1041,19 +1050,68 @@ static void frac64_shr(FloatParts64 *a, int c)
static void frac128_shr(FloatParts128 *a, int c)
{
- shift128Right(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+ uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+
+ if (c & 64) {
+ a1 = a0, a0 = 0;
+ }
+
+ c &= 63;
+ if (c) {
+ a1 = shr_double(a0, a1, c);
+ a0 = a0 >> c;
+ }
+
+ a->frac_hi = a0;
+ a->frac_lo = a1;
}
#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
static void frac64_shrjam(FloatParts64 *a, int c)
{
- shift64RightJamming(a->frac, c, &a->frac);
+ uint64_t a0 = a->frac;
+
+ if (likely(c != 0)) {
+ if (likely(c < 64)) {
+ a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
+ } else {
+ a0 = a0 != 0;
+ }
+ a->frac = a0;
+ }
}
static void frac128_shrjam(FloatParts128 *a, int c)
{
- shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+ uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+ uint64_t sticky = 0;
+
+ if (unlikely(c == 0)) {
+ return;
+ } else if (likely(c < 64)) {
+ /* nothing */
+ } else if (likely(c < 128)) {
+ sticky = a1;
+ a1 = a0;
+ a0 = 0;
+ c &= 63;
+ if (c == 0) {
+ goto done;
+ }
+ } else {
+ sticky = a0 | a1;
+ a0 = a1 = 0;
+ goto done;
+ }
+
+ sticky |= shr_double(a1, 0, c);
+ a1 = shr_double(a0, a1, c);
+ a0 = a0 >> c;
+
+ done:
+ a->frac_lo = a1 | (sticky != 0);
+ a->frac_hi = a0;
}
static void frac256_shrjam(FloatParts256 *a, int c)
@@ -1061,7 +1119,6 @@ static void frac256_shrjam(FloatParts256 *a, int c)
uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
uint64_t sticky = 0;
- int invc;
if (unlikely(c == 0)) {
return;
@@ -1086,12 +1143,11 @@ static void frac256_shrjam(FloatParts256 *a, int c)
goto done;
}
- invc = -c & 63;
- sticky |= a3 << invc;
- a3 = (a3 >> c) | (a2 << invc);
- a2 = (a2 >> c) | (a1 << invc);
- a1 = (a1 >> c) | (a0 << invc);
- a0 = (a0 >> c);
+ sticky |= shr_double(a3, 0, c);
+ a3 = shr_double(a2, a3, c);
+ a2 = shr_double(a1, a2, c);
+ a1 = shr_double(a0, a1, c);
+ a0 = a0 >> c;
done:
a->frac_lo = a3 | (sticky != 0);
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 42/46] softfloat: Move div_floats to softfloat-parts.c.inc
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (40 preceding siblings ...)
2021-05-16 12:34 ` [PULL 41/46] softfloat: Introduce sh[lr]_double primitives Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-20 13:40 ` Peter Maydell
2021-05-16 12:34 ` [PULL 43/46] softfloat: Split float_to_float Richard Henderson
` (5 subsequent siblings)
47 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Rename to parts$N_div.
Implement float128_div with FloatParts128.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 290 +++++++++++++++-----------------------
fpu/softfloat-parts.c.inc | 55 ++++++++
2 files changed, 171 insertions(+), 174 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 34689959a9..a6dbb1dabf 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -803,6 +803,14 @@ static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
#define parts_muladd(A, B, C, Z, S) \
PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
+static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
+ float_status *s);
+static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
+ float_status *s);
+
+#define parts_div(A, B, S) \
+ PARTS_GENERIC_64_128(div, A)(A, B, S)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@@ -895,6 +903,87 @@ static void frac128_clear(FloatParts128 *a)
#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
+static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
+{
+ uint64_t n1, n0, r, q;
+ bool ret;
+
+ /*
+ * We want a 2*N / N-bit division to produce exactly an N-bit
+ * result, so that we do not lose any precision and so that we
+ * do not have to renormalize afterward. If A.frac < B.frac,
+ * then division would produce an (N-1)-bit result; shift A left
+ * by one to produce the an N-bit result, and return true to
+ * decrement the exponent to match.
+ *
+ * The udiv_qrnnd algorithm that we're using requires normalization,
+ * i.e. the msb of the denominator must be set, which is already true.
+ */
+ ret = a->frac < b->frac;
+ if (ret) {
+ n0 = a->frac;
+ n1 = 0;
+ } else {
+ n0 = a->frac >> 1;
+ n1 = a->frac << 63;
+ }
+ q = udiv_qrnnd(&r, n0, n1, b->frac);
+
+ /* Set lsb if there is a remainder, to set inexact. */
+ a->frac = q | (r != 0);
+
+ return ret;
+}
+
+static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
+{
+ uint64_t q0, q1, a0, a1, b0, b1;
+ uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
+ bool ret = false;
+
+ a0 = a->frac_hi, a1 = a->frac_lo;
+ b0 = b->frac_hi, b1 = b->frac_lo;
+
+ ret = lt128(a0, a1, b0, b1);
+ if (!ret) {
+ a1 = shr_double(a0, a1, 1);
+ a0 = a0 >> 1;
+ }
+
+ /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
+ q0 = estimateDiv128To64(a0, a1, b0);
+
+ /*
+ * Estimate is high because B1 was not included (unless B1 == 0).
+ * Reduce quotient and increase remainder until remainder is non-negative.
+ * This loop will execute 0 to 2 times.
+ */
+ mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
+ sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
+ while (r0 != 0) {
+ q0--;
+ add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
+ }
+
+ /* Repeat using the remainder, producing a second word of quotient. */
+ q1 = estimateDiv128To64(r1, r2, b0);
+ mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
+ sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
+ while (r1 != 0) {
+ q1--;
+ add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
+ }
+
+ /* Any remainder indicates inexact; set sticky bit. */
+ q1 |= (r2 | r3) != 0;
+
+ a->frac_hi = q0;
+ a->frac_lo = q1;
+ return ret;
+}
+
+#define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
+
static bool frac64_eqz(FloatParts64 *a)
{
return a->frac == 0;
@@ -1821,110 +1910,42 @@ float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
}
/*
- * Returns the result of dividing the floating-point value `a' by the
- * corresponding value `b'. The operation is performed according to
- * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+ * Division
*/
-static FloatParts64 div_floats(FloatParts64 a, FloatParts64 b, float_status *s)
-{
- bool sign = a.sign ^ b.sign;
-
- if (a.cls == float_class_normal && b.cls == float_class_normal) {
- uint64_t n0, n1, q, r;
- int exp = a.exp - b.exp;
-
- /*
- * We want a 2*N / N-bit division to produce exactly an N-bit
- * result, so that we do not lose any precision and so that we
- * do not have to renormalize afterward. If A.frac < B.frac,
- * then division would produce an (N-1)-bit result; shift A left
- * by one to produce the an N-bit result, and decrement the
- * exponent to match.
- *
- * The udiv_qrnnd algorithm that we're using requires normalization,
- * i.e. the msb of the denominator must be set, which is already true.
- */
- if (a.frac < b.frac) {
- exp -= 1;
- shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
- } else {
- shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT, &n1, &n0);
- }
- q = udiv_qrnnd(&r, n1, n0, b.frac);
-
- /* Set lsb if there is a remainder, to set inexact. */
- a.frac = q | (r != 0);
- a.sign = sign;
- a.exp = exp;
- return a;
- }
- /* handle all the NaN cases */
- if (is_nan(a.cls) || is_nan(b.cls)) {
- return *parts_pick_nan(&a, &b, s);
- }
- /* 0/0 or Inf/Inf */
- if (a.cls == b.cls
- &&
- (a.cls == float_class_inf || a.cls == float_class_zero)) {
- float_raise(float_flag_invalid, s);
- parts_default_nan(&a, s);
- return a;
- }
- /* Inf / x or 0 / x */
- if (a.cls == float_class_inf || a.cls == float_class_zero) {
- a.sign = sign;
- return a;
- }
- /* Div 0 => Inf */
- if (b.cls == float_class_zero) {
- float_raise(float_flag_divbyzero, s);
- a.cls = float_class_inf;
- a.sign = sign;
- return a;
- }
- /* Div by Inf */
- if (b.cls == float_class_inf) {
- a.cls = float_class_zero;
- a.sign = sign;
- return a;
- }
- g_assert_not_reached();
-}
-
float16 float16_div(float16 a, float16 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
float16_unpack_canonical(&pa, a, status);
float16_unpack_canonical(&pb, b, status);
- pr = div_floats(pa, pb, status);
+ pr = parts_div(&pa, &pb, status);
- return float16_round_pack_canonical(&pr, status);
+ return float16_round_pack_canonical(pr, status);
}
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_div(float32 a, float32 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
float32_unpack_canonical(&pa, a, status);
float32_unpack_canonical(&pb, b, status);
- pr = div_floats(pa, pb, status);
+ pr = parts_div(&pa, &pb, status);
- return float32_round_pack_canonical(&pr, status);
+ return float32_round_pack_canonical(pr, status);
}
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_div(float64 a, float64 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
float64_unpack_canonical(&pa, a, status);
float64_unpack_canonical(&pb, b, status);
- pr = div_floats(pa, pb, status);
+ pr = parts_div(&pa, &pb, status);
- return float64_round_pack_canonical(&pr, status);
+ return float64_round_pack_canonical(pr, status);
}
static float hard_f32_div(float a, float b)
@@ -1985,20 +2006,28 @@ float64_div(float64 a, float64 b, float_status *s)
f64_div_pre, f64_div_post);
}
-/*
- * Returns the result of dividing the bfloat16
- * value `a' by the corresponding value `b'.
- */
-
-bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
+bfloat16 QEMU_FLATTEN
+bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
bfloat16_unpack_canonical(&pa, a, status);
bfloat16_unpack_canonical(&pb, b, status);
- pr = div_floats(pa, pb, status);
+ pr = parts_div(&pa, &pb, status);
- return bfloat16_round_pack_canonical(&pr, status);
+ return bfloat16_round_pack_canonical(pr, status);
+}
+
+float128 QEMU_FLATTEN
+float128_div(float128 a, float128 b, float_status *status)
+{
+ FloatParts128 pa, pb, *pr;
+
+ float128_unpack_canonical(&pa, a, status);
+ float128_unpack_canonical(&pb, b, status);
+ pr = parts_div(&pa, &pb, status);
+
+ return float128_round_pack_canonical(pr, status);
}
/*
@@ -7123,93 +7152,6 @@ float128 float128_round_to_int(float128 a, float_status *status)
}
-/*----------------------------------------------------------------------------
-| Returns the result of dividing the quadruple-precision floating-point value
-| `a' by the corresponding value `b'. The operation is performed according to
-| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_div(float128 a, float128 b, float_status *status)
-{
- bool aSign, bSign, zSign;
- int32_t aExp, bExp, zExp;
- uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
- uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- bSig1 = extractFloat128Frac1( b );
- bSig0 = extractFloat128Frac0( b );
- bExp = extractFloat128Exp( b );
- bSign = extractFloat128Sign( b );
- zSign = aSign ^ bSign;
- if ( aExp == 0x7FFF ) {
- if (aSig0 | aSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- if ( bExp == 0x7FFF ) {
- if (bSig0 | bSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- goto invalid;
- }
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- if ( bExp == 0x7FFF ) {
- if (bSig0 | bSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- return packFloat128( zSign, 0, 0, 0 );
- }
- if ( bExp == 0 ) {
- if ( ( bSig0 | bSig1 ) == 0 ) {
- if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return float128_default_nan(status);
- }
- float_raise(float_flag_divbyzero, status);
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
- }
- if ( aExp == 0 ) {
- if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
- normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
- }
- zExp = aExp - bExp + 0x3FFD;
- shortShift128Left(
- aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
- shortShift128Left(
- bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
- if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
- shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
- ++zExp;
- }
- zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
- mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
- sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
- while ( (int64_t) rem0 < 0 ) {
- --zSig0;
- add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
- }
- zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
- if ( ( zSig1 & 0x3FFF ) <= 4 ) {
- mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
- sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
- while ( (int64_t) rem1 < 0 ) {
- --zSig1;
- add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
- }
- zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
- }
- shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
- return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
-
-}
-
/*----------------------------------------------------------------------------
| Returns the remainder of the quadruple-precision floating-point value `a'
| with respect to the corresponding value `b'. The operation is performed
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index a203811299..f8165d92f9 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -539,3 +539,58 @@ static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
parts_default_nan(a, s);
return a;
}
+
+/*
+ * Returns the result of dividing the floating-point value `a' by the
+ * corresponding value `b'. The operation is performed according to
+ * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+ */
+static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
+ float_status *s)
+{
+ int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+ bool sign = a->sign ^ b->sign;
+
+ if (likely(ab_mask == float_cmask_normal)) {
+ a->sign = sign;
+ a->exp -= b->exp + frac_div(a, b);
+ return a;
+ }
+
+ /* 0/0 or Inf/Inf => NaN */
+ if (unlikely(ab_mask == float_cmask_zero) ||
+ unlikely(ab_mask == float_cmask_inf)) {
+ float_raise(float_flag_invalid, s);
+ parts_default_nan(a, s);
+ return a;
+ }
+
+ /* All the NaN cases */
+ if (unlikely(ab_mask & float_cmask_anynan)) {
+ return parts_pick_nan(a, b, s);
+ }
+
+ a->sign = sign;
+
+ /* Inf / X */
+ if (a->cls == float_class_inf) {
+ return a;
+ }
+
+ /* 0 / X */
+ if (a->cls == float_class_zero) {
+ return a;
+ }
+
+ /* X / Inf */
+ if (b->cls == float_class_inf) {
+ a->cls = float_class_zero;
+ return a;
+ }
+
+ /* X / 0 => Inf */
+ g_assert(b->cls == float_class_zero);
+ float_raise(float_flag_divbyzero, s);
+ a->cls = float_class_inf;
+ return a;
+}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PULL 42/46] softfloat: Move div_floats to softfloat-parts.c.inc
2021-05-16 12:34 ` [PULL 42/46] softfloat: Move div_floats to softfloat-parts.c.inc Richard Henderson
@ 2021-05-20 13:40 ` Peter Maydell
2021-05-20 16:15 ` Richard Henderson
0 siblings, 1 reply; 52+ messages in thread
From: Peter Maydell @ 2021-05-20 13:40 UTC (permalink / raw)
To: Richard Henderson; +Cc: Alex Bennée, QEMU Developers
On Sun, 16 May 2021 at 13:38, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Rename to parts$N_div.
> Implement float128_div with FloatParts128.
>
> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> +static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
> +{
> + uint64_t n1, n0, r, q;
> + bool ret;
> +
> + /*
> + * We want a 2*N / N-bit division to produce exactly an N-bit
> + * result, so that we do not lose any precision and so that we
> + * do not have to renormalize afterward. If A.frac < B.frac,
> + * then division would produce an (N-1)-bit result; shift A left
> + * by one to produce the an N-bit result, and return true to
> + * decrement the exponent to match.
> + *
> + * The udiv_qrnnd algorithm that we're using requires normalization,
> + * i.e. the msb of the denominator must be set, which is already true.
> + */
> + ret = a->frac < b->frac;
> + if (ret) {
> + n0 = a->frac;
> + n1 = 0;
> + } else {
> + n0 = a->frac >> 1;
> + n1 = a->frac << 63;
> + }
> + q = udiv_qrnnd(&r, n0, n1, b->frac);
Hi -- Coverity is suspicious about this line (CID 1453209),
because udiv_qrrnd()'s prototype is
static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
uint64_t n0, uint64_t d)
but here we pass n0, n1 rather than n1, n0...
Bug, or false positive ?
thanks
-- PMM
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PULL 42/46] softfloat: Move div_floats to softfloat-parts.c.inc
2021-05-20 13:40 ` Peter Maydell
@ 2021-05-20 16:15 ` Richard Henderson
2021-05-20 17:04 ` Peter Maydell
0 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2021-05-20 16:15 UTC (permalink / raw)
To: Peter Maydell; +Cc: Alex Bennée, QEMU Developers
On 5/20/21 8:40 AM, Peter Maydell wrote:
> On Sun, 16 May 2021 at 13:38, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> Rename to parts$N_div.
>> Implement float128_div with FloatParts128.
>>
>> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>
>> +static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
>> +{
>> + uint64_t n1, n0, r, q;
>> + bool ret;
>> +
>> + /*
>> + * We want a 2*N / N-bit division to produce exactly an N-bit
>> + * result, so that we do not lose any precision and so that we
>> + * do not have to renormalize afterward. If A.frac < B.frac,
>> + * then division would produce an (N-1)-bit result; shift A left
>> + * by one to produce the an N-bit result, and return true to
>> + * decrement the exponent to match.
>> + *
>> + * The udiv_qrnnd algorithm that we're using requires normalization,
>> + * i.e. the msb of the denominator must be set, which is already true.
>> + */
>> + ret = a->frac < b->frac;
>> + if (ret) {
>> + n0 = a->frac;
>> + n1 = 0;
>> + } else {
>> + n0 = a->frac >> 1;
>> + n1 = a->frac << 63;
>> + }
>> + q = udiv_qrnnd(&r, n0, n1, b->frac);
>
> Hi -- Coverity is suspicious about this line (CID 1453209),
> because udiv_qrrnd()'s prototype is
> static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
> uint64_t n0, uint64_t d)
>
> but here we pass n0, n1 rather than n1, n0...
>
> Bug, or false positive ?
I think it's a false positive.
Different naming schemes between gcc (from whence qrrnd came, 0 as least
significant element) and the Berkeley softfloat code (0 as most significant
element).
r~
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PULL 42/46] softfloat: Move div_floats to softfloat-parts.c.inc
2021-05-20 16:15 ` Richard Henderson
@ 2021-05-20 17:04 ` Peter Maydell
0 siblings, 0 replies; 52+ messages in thread
From: Peter Maydell @ 2021-05-20 17:04 UTC (permalink / raw)
To: Richard Henderson; +Cc: Alex Bennée, QEMU Developers
On Thu, 20 May 2021 at 17:15, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 5/20/21 8:40 AM, Peter Maydell wrote:
> > On Sun, 16 May 2021 at 13:38, Richard Henderson
> > Hi -- Coverity is suspicious about this line (CID 1453209),
> > because udiv_qrrnd()'s prototype is
> > static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
> > uint64_t n0, uint64_t d)
> >
> > but here we pass n0, n1 rather than n1, n0...
> >
> > Bug, or false positive ?
>
> I think it's a false positive.
>
> Different naming schemes between gcc (from whence qrrnd came, 0 as least
> significant element) and the Berkeley softfloat code (0 as most significant
> element).
OK; I've marked it as an fp in the coverity UI.
thanks
-- PMM
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PULL 43/46] softfloat: Split float_to_float
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (41 preceding siblings ...)
2021-05-16 12:34 ` [PULL 42/46] softfloat: Move div_floats to softfloat-parts.c.inc Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 44/46] softfloat: Convert float-to-float conversions with float128 Richard Henderson
` (4 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Split out parts_float_to_ahp and parts_float_to_float.
Convert to pointers.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 174 ++++++++++++++++++++++++++++--------------------
1 file changed, 101 insertions(+), 73 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index a6dbb1dabf..80025539ef 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -2037,83 +2037,105 @@ float128_div(float128 a, float128 b, float_status *status)
* conversion is performed according to the IEC/IEEE Standard for
* Binary Floating-Point Arithmetic.
*
- * The float_to_float helper only needs to take care of raising
- * invalid exceptions and handling the conversion on NaNs.
+ * Usually this only needs to take care of raising invalid exceptions
+ * and handling the conversion on NaNs.
*/
-static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
- float_status *s)
+static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
{
- if (dstf->arm_althp) {
- switch (a.cls) {
- case float_class_qnan:
- case float_class_snan:
- /* There is no NaN in the destination format. Raise Invalid
- * and return a zero with the sign of the input NaN.
- */
- float_raise(float_flag_invalid, s);
- a.cls = float_class_zero;
- a.frac = 0;
- a.exp = 0;
- break;
+ switch (a->cls) {
+ case float_class_qnan:
+ case float_class_snan:
+ /*
+ * There is no NaN in the destination format. Raise Invalid
+ * and return a zero with the sign of the input NaN.
+ */
+ float_raise(float_flag_invalid, s);
+ a->cls = float_class_zero;
+ break;
- case float_class_inf:
- /* There is no Inf in the destination format. Raise Invalid
- * and return the maximum normal with the correct sign.
- */
- float_raise(float_flag_invalid, s);
- a.cls = float_class_normal;
- a.exp = dstf->exp_max;
- a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
- break;
+ case float_class_inf:
+ /*
+ * There is no Inf in the destination format. Raise Invalid
+ * and return the maximum normal with the correct sign.
+ */
+ float_raise(float_flag_invalid, s);
+ a->cls = float_class_normal;
+ a->exp = float16_params_ahp.exp_max;
+ a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
+ float16_params_ahp.frac_size + 1);
+ break;
- default:
- break;
- }
- } else if (is_nan(a.cls)) {
- parts_return_nan(&a, s);
+ case float_class_normal:
+ case float_class_zero:
+ break;
+
+ default:
+ g_assert_not_reached();
}
- return a;
}
+static void parts64_float_to_float(FloatParts64 *a, float_status *s)
+{
+ if (is_nan(a->cls)) {
+ parts_return_nan(a, s);
+ }
+}
+
+static void parts128_float_to_float(FloatParts128 *a, float_status *s)
+{
+ if (is_nan(a->cls)) {
+ parts_return_nan(a, s);
+ }
+}
+
+#define parts_float_to_float(P, S) \
+ PARTS_GENERIC_64_128(float_to_float, P)(P, S)
+
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts64 pa, pr;
+ FloatParts64 p;
- float16a_unpack_canonical(&pa, a, s, fmt16);
- pr = float_to_float(pa, &float32_params, s);
- return float32_round_pack_canonical(&pr, s);
+ float16a_unpack_canonical(&p, a, s, fmt16);
+ parts_float_to_float(&p, s);
+ return float32_round_pack_canonical(&p, s);
}
float64 float16_to_float64(float16 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts64 pa, pr;
+ FloatParts64 p;
- float16a_unpack_canonical(&pa, a, s, fmt16);
- pr = float_to_float(pa, &float64_params, s);
- return float64_round_pack_canonical(&pr, s);
+ float16a_unpack_canonical(&p, a, s, fmt16);
+ parts_float_to_float(&p, s);
+ return float64_round_pack_canonical(&p, s);
}
float16 float32_to_float16(float32 a, bool ieee, float_status *s)
{
- const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts64 pa, pr;
+ FloatParts64 p;
+ const FloatFmt *fmt;
- float32_unpack_canonical(&pa, a, s);
- pr = float_to_float(pa, fmt16, s);
- return float16a_round_pack_canonical(&pr, s, fmt16);
+ float32_unpack_canonical(&p, a, s);
+ if (ieee) {
+ parts_float_to_float(&p, s);
+ fmt = &float16_params;
+ } else {
+ parts_float_to_ahp(&p, s);
+ fmt = &float16_params_ahp;
+ }
+ return float16a_round_pack_canonical(&p, s, fmt);
}
static float64 QEMU_SOFTFLOAT_ATTR
soft_float32_to_float64(float32 a, float_status *s)
{
- FloatParts64 pa, pr;
+ FloatParts64 p;
- float32_unpack_canonical(&pa, a, s);
- pr = float_to_float(pa, &float64_params, s);
- return float64_round_pack_canonical(&pr, s);
+ float32_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return float64_round_pack_canonical(&p, s);
}
float64 float32_to_float64(float32 a, float_status *s)
@@ -2134,57 +2156,63 @@ float64 float32_to_float64(float32 a, float_status *s)
float16 float64_to_float16(float64 a, bool ieee, float_status *s)
{
- const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts64 pa, pr;
+ FloatParts64 p;
+ const FloatFmt *fmt;
- float64_unpack_canonical(&pa, a, s);
- pr = float_to_float(pa, fmt16, s);
- return float16a_round_pack_canonical(&pr, s, fmt16);
+ float64_unpack_canonical(&p, a, s);
+ if (ieee) {
+ parts_float_to_float(&p, s);
+ fmt = &float16_params;
+ } else {
+ parts_float_to_ahp(&p, s);
+ fmt = &float16_params_ahp;
+ }
+ return float16a_round_pack_canonical(&p, s, fmt);
}
float32 float64_to_float32(float64 a, float_status *s)
{
- FloatParts64 pa, pr;
+ FloatParts64 p;
- float64_unpack_canonical(&pa, a, s);
- pr = float_to_float(pa, &float32_params, s);
- return float32_round_pack_canonical(&pr, s);
+ float64_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return float32_round_pack_canonical(&p, s);
}
float32 bfloat16_to_float32(bfloat16 a, float_status *s)
{
- FloatParts64 pa, pr;
+ FloatParts64 p;
- bfloat16_unpack_canonical(&pa, a, s);
- pr = float_to_float(pa, &float32_params, s);
- return float32_round_pack_canonical(&pr, s);
+ bfloat16_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return float32_round_pack_canonical(&p, s);
}
float64 bfloat16_to_float64(bfloat16 a, float_status *s)
{
- FloatParts64 pa, pr;
+ FloatParts64 p;
- bfloat16_unpack_canonical(&pa, a, s);
- pr = float_to_float(pa, &float64_params, s);
- return float64_round_pack_canonical(&pr, s);
+ bfloat16_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return float64_round_pack_canonical(&p, s);
}
bfloat16 float32_to_bfloat16(float32 a, float_status *s)
{
- FloatParts64 pa, pr;
+ FloatParts64 p;
- float32_unpack_canonical(&pa, a, s);
- pr = float_to_float(pa, &bfloat16_params, s);
- return bfloat16_round_pack_canonical(&pr, s);
+ float32_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return bfloat16_round_pack_canonical(&p, s);
}
bfloat16 float64_to_bfloat16(float64 a, float_status *s)
{
- FloatParts64 pa, pr;
+ FloatParts64 p;
- float64_unpack_canonical(&pa, a, s);
- pr = float_to_float(pa, &bfloat16_params, s);
- return bfloat16_round_pack_canonical(&pr, s);
+ float64_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return bfloat16_round_pack_canonical(&p, s);
}
/*
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 44/46] softfloat: Convert float-to-float conversions with float128
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (42 preceding siblings ...)
2021-05-16 12:34 ` [PULL 43/46] softfloat: Split float_to_float Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 45/46] softfloat: Move round_to_int to softfloat-parts.c.inc Richard Henderson
` (3 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Introduce parts_float_to_float_widen and parts_float_to_float_narrow.
Use them for float128_to_float{32,64} and float{32,64}_to_float128.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 203 ++++++++++++++++--------------------------------
1 file changed, 69 insertions(+), 134 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 80025539ef..d056b5730b 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -2092,6 +2092,35 @@ static void parts128_float_to_float(FloatParts128 *a, float_status *s)
#define parts_float_to_float(P, S) \
PARTS_GENERIC_64_128(float_to_float, P)(P, S)
+static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
+ float_status *s)
+{
+ a->cls = b->cls;
+ a->sign = b->sign;
+ a->exp = b->exp;
+
+ if (a->cls == float_class_normal) {
+ frac_truncjam(a, b);
+ } else if (is_nan(a->cls)) {
+ /* Discard the low bits of the NaN. */
+ a->frac = b->frac_hi;
+ parts_return_nan(a, s);
+ }
+}
+
+static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
+ float_status *s)
+{
+ a->cls = b->cls;
+ a->sign = b->sign;
+ a->exp = b->exp;
+ frac_widen(a, b);
+
+ if (is_nan(a->cls)) {
+ parts_return_nan(a, s);
+ }
+}
+
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
@@ -2215,6 +2244,46 @@ bfloat16 float64_to_bfloat16(float64 a, float_status *s)
return bfloat16_round_pack_canonical(&p, s);
}
+float32 float128_to_float32(float128 a, float_status *s)
+{
+ FloatParts64 p64;
+ FloatParts128 p128;
+
+ float128_unpack_canonical(&p128, a, s);
+ parts_float_to_float_narrow(&p64, &p128, s);
+ return float32_round_pack_canonical(&p64, s);
+}
+
+float64 float128_to_float64(float128 a, float_status *s)
+{
+ FloatParts64 p64;
+ FloatParts128 p128;
+
+ float128_unpack_canonical(&p128, a, s);
+ parts_float_to_float_narrow(&p64, &p128, s);
+ return float64_round_pack_canonical(&p64, s);
+}
+
+float128 float32_to_float128(float32 a, float_status *s)
+{
+ FloatParts64 p64;
+ FloatParts128 p128;
+
+ float32_unpack_canonical(&p64, a, s);
+ parts_float_to_float_widen(&p128, &p64, s);
+ return float128_round_pack_canonical(&p128, s);
+}
+
+float128 float64_to_float128(float64 a, float_status *s)
+{
+ FloatParts64 p64;
+ FloatParts128 p128;
+
+ float64_unpack_canonical(&p64, a, s);
+ parts_float_to_float_widen(&p128, &p64, s);
+ return float128_round_pack_canonical(&p128, s);
+}
+
/*
* Rounds the floating-point value `a' to an integer, and returns the
* result as a floating-point value. The operation is performed
@@ -5175,38 +5244,6 @@ floatx80 float32_to_floatx80(float32 a, float_status *status)
}
-/*----------------------------------------------------------------------------
-| Returns the result of converting the single-precision floating-point value
-| `a' to the double-precision floating-point format. The conversion is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float32_to_float128(float32 a, float_status *status)
-{
- bool aSign;
- int aExp;
- uint32_t aSig;
-
- a = float32_squash_input_denormal(a, status);
- aSig = extractFloat32Frac( a );
- aExp = extractFloat32Exp( a );
- aSign = extractFloat32Sign( a );
- if ( aExp == 0xFF ) {
- if (aSig) {
- return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
- }
- return packFloat128( aSign, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
- normalizeFloat32Subnormal( aSig, &aExp, &aSig );
- --aExp;
- }
- return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
-
-}
-
/*----------------------------------------------------------------------------
| Returns the remainder of the single-precision floating-point value `a'
| with respect to the corresponding value `b'. The operation is performed
@@ -5480,40 +5517,6 @@ floatx80 float64_to_floatx80(float64 a, float_status *status)
}
-/*----------------------------------------------------------------------------
-| Returns the result of converting the double-precision floating-point value
-| `a' to the quadruple-precision floating-point format. The conversion is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float64_to_float128(float64 a, float_status *status)
-{
- bool aSign;
- int aExp;
- uint64_t aSig, zSig0, zSig1;
-
- a = float64_squash_input_denormal(a, status);
- aSig = extractFloat64Frac( a );
- aExp = extractFloat64Exp( a );
- aSign = extractFloat64Sign( a );
- if ( aExp == 0x7FF ) {
- if (aSig) {
- return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
- }
- return packFloat128( aSign, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
- normalizeFloat64Subnormal( aSig, &aExp, &aSig );
- --aExp;
- }
- shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
- return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
-
-}
-
-
/*----------------------------------------------------------------------------
| Returns the remainder of the double-precision floating-point value `a'
| with respect to the corresponding value `b'. The operation is performed
@@ -6915,74 +6918,6 @@ uint32_t float128_to_uint32(float128 a, float_status *status)
return res;
}
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the single-precision floating-point format. The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float32 float128_to_float32(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp;
- uint64_t aSig0, aSig1;
- uint32_t zSig;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp == 0x7FFF ) {
- if ( aSig0 | aSig1 ) {
- return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
- }
- return packFloat32( aSign, 0xFF, 0 );
- }
- aSig0 |= ( aSig1 != 0 );
- shift64RightJamming( aSig0, 18, &aSig0 );
- zSig = aSig0;
- if ( aExp || zSig ) {
- zSig |= 0x40000000;
- aExp -= 0x3F81;
- }
- return roundAndPackFloat32(aSign, aExp, zSig, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the double-precision floating-point format. The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float64 float128_to_float64(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp;
- uint64_t aSig0, aSig1;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp == 0x7FFF ) {
- if ( aSig0 | aSig1 ) {
- return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
- }
- return packFloat64( aSign, 0x7FF, 0 );
- }
- shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
- aSig0 |= ( aSig1 != 0 );
- if ( aExp || aSig0 ) {
- aSig0 |= UINT64_C(0x4000000000000000);
- aExp -= 0x3C01;
- }
- return roundAndPackFloat64(aSign, aExp, aSig0, status);
-
-}
-
/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point
| value `a' to the extended double-precision floating-point format. The
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 45/46] softfloat: Move round_to_int to softfloat-parts.c.inc
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (43 preceding siblings ...)
2021-05-16 12:34 ` [PULL 44/46] softfloat: Convert float-to-float conversions with float128 Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 12:34 ` [PULL 46/46] softfloat: Move round_to_int_and_pack " Richard Henderson
` (2 subsequent siblings)
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
At the same time, convert to pointers, split out
parts$N_round_to_int_normal, define a macro for
parts_round_to_int using QEMU_GENERIC.
This necessarily meant some rearrangement to the
rount_to_{,u}int_and_pack routines, so go ahead and
convert to parts_round_to_int_normal, which in turn
allows cleaning up of the raised exception handling.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 434 ++++++++++----------------------------
fpu/softfloat-parts.c.inc | 157 ++++++++++++++
2 files changed, 263 insertions(+), 328 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index d056b5730b..5647a05d5d 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -811,6 +811,24 @@ static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
#define parts_div(A, B, S) \
PARTS_GENERIC_64_128(div, A)(A, B, S)
+static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
+ int scale, int frac_size);
+static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
+ int scale, int frac_size);
+
+#define parts_round_to_int_normal(A, R, C, F) \
+ PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
+
+static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
+ int scale, float_status *s,
+ const FloatFmt *fmt);
+static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
+ int scale, float_status *s,
+ const FloatFmt *fmt);
+
+#define parts_round_to_int(A, R, C, S, F) \
+ PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@@ -2285,153 +2303,52 @@ float128 float64_to_float128(float64 a, float_status *s)
}
/*
- * Rounds the floating-point value `a' to an integer, and returns the
- * result as a floating-point value. The operation is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic.
+ * Round to integral value
*/
-static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
- int scale, float_status *s)
-{
- switch (a.cls) {
- case float_class_qnan:
- case float_class_snan:
- parts_return_nan(&a, s);
- break;
-
- case float_class_zero:
- case float_class_inf:
- /* already "integral" */
- break;
-
- case float_class_normal:
- scale = MIN(MAX(scale, -0x10000), 0x10000);
- a.exp += scale;
-
- if (a.exp >= DECOMPOSED_BINARY_POINT) {
- /* already integral */
- break;
- }
- if (a.exp < 0) {
- bool one;
- /* all fractional */
- float_raise(float_flag_inexact, s);
- switch (rmode) {
- case float_round_nearest_even:
- one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
- break;
- case float_round_ties_away:
- one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
- break;
- case float_round_to_zero:
- one = false;
- break;
- case float_round_up:
- one = !a.sign;
- break;
- case float_round_down:
- one = a.sign;
- break;
- case float_round_to_odd:
- one = true;
- break;
- default:
- g_assert_not_reached();
- }
-
- if (one) {
- a.frac = DECOMPOSED_IMPLICIT_BIT;
- a.exp = 0;
- } else {
- a.cls = float_class_zero;
- }
- } else {
- uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
- uint64_t frac_lsbm1 = frac_lsb >> 1;
- uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
- uint64_t rnd_mask = rnd_even_mask >> 1;
- uint64_t inc;
-
- switch (rmode) {
- case float_round_nearest_even:
- inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
- break;
- case float_round_ties_away:
- inc = frac_lsbm1;
- break;
- case float_round_to_zero:
- inc = 0;
- break;
- case float_round_up:
- inc = a.sign ? 0 : rnd_mask;
- break;
- case float_round_down:
- inc = a.sign ? rnd_mask : 0;
- break;
- case float_round_to_odd:
- inc = a.frac & frac_lsb ? 0 : rnd_mask;
- break;
- default:
- g_assert_not_reached();
- }
-
- if (a.frac & rnd_mask) {
- float_raise(float_flag_inexact, s);
- if (uadd64_overflow(a.frac, inc, &a.frac)) {
- a.frac >>= 1;
- a.frac |= DECOMPOSED_IMPLICIT_BIT;
- a.exp++;
- }
- a.frac &= ~rnd_mask;
- }
- }
- break;
- default:
- g_assert_not_reached();
- }
- return a;
-}
-
float16 float16_round_to_int(float16 a, float_status *s)
{
- FloatParts64 pa, pr;
+ FloatParts64 p;
- float16_unpack_canonical(&pa, a, s);
- pr = round_to_int(pa, s->float_rounding_mode, 0, s);
- return float16_round_pack_canonical(&pr, s);
+ float16_unpack_canonical(&p, a, s);
+ parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
+ return float16_round_pack_canonical(&p, s);
}
float32 float32_round_to_int(float32 a, float_status *s)
{
- FloatParts64 pa, pr;
+ FloatParts64 p;
- float32_unpack_canonical(&pa, a, s);
- pr = round_to_int(pa, s->float_rounding_mode, 0, s);
- return float32_round_pack_canonical(&pr, s);
+ float32_unpack_canonical(&p, a, s);
+ parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
+ return float32_round_pack_canonical(&p, s);
}
float64 float64_round_to_int(float64 a, float_status *s)
{
- FloatParts64 pa, pr;
+ FloatParts64 p;
- float64_unpack_canonical(&pa, a, s);
- pr = round_to_int(pa, s->float_rounding_mode, 0, s);
- return float64_round_pack_canonical(&pr, s);
+ float64_unpack_canonical(&p, a, s);
+ parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
+ return float64_round_pack_canonical(&p, s);
}
-/*
- * Rounds the bfloat16 value `a' to an integer, and returns the
- * result as a bfloat16 value.
- */
-
bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
{
- FloatParts64 pa, pr;
+ FloatParts64 p;
- bfloat16_unpack_canonical(&pa, a, s);
- pr = round_to_int(pa, s->float_rounding_mode, 0, s);
- return bfloat16_round_pack_canonical(&pr, s);
+ bfloat16_unpack_canonical(&p, a, s);
+ parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
+ return bfloat16_round_pack_canonical(&p, s);
+}
+
+float128 float128_round_to_int(float128 a, float_status *s)
+{
+ FloatParts128 p;
+
+ float128_unpack_canonical(&p, a, s);
+ parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
+ return float128_round_pack_canonical(&p, s);
}
/*
@@ -2445,48 +2362,58 @@ bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
* is returned.
*/
-static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
+static int64_t round_to_int_and_pack(FloatParts64 p, FloatRoundMode rmode,
int scale, int64_t min, int64_t max,
float_status *s)
{
+ int flags = 0;
uint64_t r;
- int orig_flags = get_float_exception_flags(s);
- FloatParts64 p = round_to_int(in, rmode, scale, s);
switch (p.cls) {
case float_class_snan:
case float_class_qnan:
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return max;
+ flags = float_flag_invalid;
+ r = max;
+ break;
+
case float_class_inf:
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return p.sign ? min : max;
+ flags = float_flag_invalid;
+ r = p.sign ? min : max;
+ break;
+
case float_class_zero:
return 0;
+
case float_class_normal:
+ /* TODO: 62 = N - 2, frac_size for rounding */
+ if (parts_round_to_int_normal(&p, rmode, scale, 62)) {
+ flags = float_flag_inexact;
+ }
+
if (p.exp <= DECOMPOSED_BINARY_POINT) {
r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
} else {
r = UINT64_MAX;
}
if (p.sign) {
- if (r <= -(uint64_t) min) {
- return -r;
+ if (r <= -(uint64_t)min) {
+ r = -r;
} else {
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return min;
- }
- } else {
- if (r <= max) {
- return r;
- } else {
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return max;
+ flags = float_flag_invalid;
+ r = min;
}
+ } else if (r > max) {
+ flags = float_flag_invalid;
+ r = max;
}
+ break;
+
default:
g_assert_not_reached();
}
+
+ float_raise(flags, s);
+ return r;
}
int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
@@ -2749,49 +2676,59 @@ int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
* flag.
*/
-static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
+static uint64_t round_to_uint_and_pack(FloatParts64 p, FloatRoundMode rmode,
int scale, uint64_t max,
float_status *s)
{
- int orig_flags = get_float_exception_flags(s);
- FloatParts64 p = round_to_int(in, rmode, scale, s);
+ int flags = 0;
uint64_t r;
switch (p.cls) {
case float_class_snan:
case float_class_qnan:
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return max;
+ flags = float_flag_invalid;
+ r = max;
+ break;
+
case float_class_inf:
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return p.sign ? 0 : max;
+ flags = float_flag_invalid;
+ r = p.sign ? 0 : max;
+ break;
+
case float_class_zero:
return 0;
+
case float_class_normal:
+ /* TODO: 62 = N - 2, frac_size for rounding */
+ if (parts_round_to_int_normal(&p, rmode, scale, 62)) {
+ flags = float_flag_inexact;
+ if (p.cls == float_class_zero) {
+ r = 0;
+ break;
+ }
+ }
+
if (p.sign) {
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return 0;
- }
-
- if (p.exp <= DECOMPOSED_BINARY_POINT) {
- r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
+ flags = float_flag_invalid;
+ r = 0;
+ } else if (p.exp > DECOMPOSED_BINARY_POINT) {
+ flags = float_flag_invalid;
+ r = max;
} else {
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return max;
+ r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
+ if (r > max) {
+ flags = float_flag_invalid;
+ r = max;
+ }
}
+ break;
- /* For uint64 this will never trip, but if p.exp is too large
- * to shift a decomposed fraction we shall have exited via the
- * 3rd leg above.
- */
- if (r > max) {
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return max;
- }
- return r;
default:
g_assert_not_reached();
}
+
+ float_raise(flags, s);
+ return r;
}
uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
@@ -6956,165 +6893,6 @@ floatx80 float128_to_floatx80(float128 a, float_status *status)
}
-/*----------------------------------------------------------------------------
-| Rounds the quadruple-precision floating-point value `a' to an integer, and
-| returns the result as a quadruple-precision floating-point value. The
-| operation is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_round_to_int(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp;
- uint64_t lastBitMask, roundBitsMask;
- float128 z;
-
- aExp = extractFloat128Exp( a );
- if ( 0x402F <= aExp ) {
- if ( 0x406F <= aExp ) {
- if ( ( aExp == 0x7FFF )
- && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
- ) {
- return propagateFloat128NaN(a, a, status);
- }
- return a;
- }
- lastBitMask = 1;
- lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
- roundBitsMask = lastBitMask - 1;
- z = a;
- switch (status->float_rounding_mode) {
- case float_round_nearest_even:
- if ( lastBitMask ) {
- add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
- if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
- }
- else {
- if ( (int64_t) z.low < 0 ) {
- ++z.high;
- if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
- }
- }
- break;
- case float_round_ties_away:
- if (lastBitMask) {
- add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
- } else {
- if ((int64_t) z.low < 0) {
- ++z.high;
- }
- }
- break;
- case float_round_to_zero:
- break;
- case float_round_up:
- if (!extractFloat128Sign(z)) {
- add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
- }
- break;
- case float_round_down:
- if (extractFloat128Sign(z)) {
- add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
- }
- break;
- case float_round_to_odd:
- /*
- * Note that if lastBitMask == 0, the last bit is the lsb
- * of high, and roundBitsMask == -1.
- */
- if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
- add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
- }
- break;
- default:
- abort();
- }
- z.low &= ~ roundBitsMask;
- }
- else {
- if ( aExp < 0x3FFF ) {
- if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
- float_raise(float_flag_inexact, status);
- aSign = extractFloat128Sign( a );
- switch (status->float_rounding_mode) {
- case float_round_nearest_even:
- if ( ( aExp == 0x3FFE )
- && ( extractFloat128Frac0( a )
- | extractFloat128Frac1( a ) )
- ) {
- return packFloat128( aSign, 0x3FFF, 0, 0 );
- }
- break;
- case float_round_ties_away:
- if (aExp == 0x3FFE) {
- return packFloat128(aSign, 0x3FFF, 0, 0);
- }
- break;
- case float_round_down:
- return
- aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
- : packFloat128( 0, 0, 0, 0 );
- case float_round_up:
- return
- aSign ? packFloat128( 1, 0, 0, 0 )
- : packFloat128( 0, 0x3FFF, 0, 0 );
-
- case float_round_to_odd:
- return packFloat128(aSign, 0x3FFF, 0, 0);
-
- case float_round_to_zero:
- break;
- }
- return packFloat128( aSign, 0, 0, 0 );
- }
- lastBitMask = 1;
- lastBitMask <<= 0x402F - aExp;
- roundBitsMask = lastBitMask - 1;
- z.low = 0;
- z.high = a.high;
- switch (status->float_rounding_mode) {
- case float_round_nearest_even:
- z.high += lastBitMask>>1;
- if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
- z.high &= ~ lastBitMask;
- }
- break;
- case float_round_ties_away:
- z.high += lastBitMask>>1;
- break;
- case float_round_to_zero:
- break;
- case float_round_up:
- if (!extractFloat128Sign(z)) {
- z.high |= ( a.low != 0 );
- z.high += roundBitsMask;
- }
- break;
- case float_round_down:
- if (extractFloat128Sign(z)) {
- z.high |= (a.low != 0);
- z.high += roundBitsMask;
- }
- break;
- case float_round_to_odd:
- if ((z.high & lastBitMask) == 0) {
- z.high |= (a.low != 0);
- z.high += roundBitsMask;
- }
- break;
- default:
- abort();
- }
- z.high &= ~ roundBitsMask;
- }
- if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
- float_raise(float_flag_inexact, status);
- }
- return z;
-
-}
-
/*----------------------------------------------------------------------------
| Returns the remainder of the quadruple-precision floating-point value `a'
| with respect to the corresponding value `b'. The operation is performed
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index f8165d92f9..b2c4624d8c 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -594,3 +594,160 @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
a->cls = float_class_inf;
return a;
}
+
+/*
+ * Rounds the floating-point value `a' to an integer, and returns the
+ * result as a floating-point value. The operation is performed
+ * according to the IEC/IEEE Standard for Binary Floating-Point
+ * Arithmetic.
+ *
+ * parts_round_to_int_normal is an internal helper function for
+ * normal numbers only, returning true for inexact but not directly
+ * raising float_flag_inexact.
+ */
+static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
+ int scale, int frac_size)
+{
+ uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
+ int shift_adj;
+
+ scale = MIN(MAX(scale, -0x10000), 0x10000);
+ a->exp += scale;
+
+ if (a->exp < 0) {
+ bool one;
+
+ /* All fractional */
+ switch (rmode) {
+ case float_round_nearest_even:
+ one = false;
+ if (a->exp == -1) {
+ FloatPartsN tmp;
+ /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
+ frac_add(&tmp, a, a);
+ /* Anything remaining means frac > 0.5. */
+ one = !frac_eqz(&tmp);
+ }
+ break;
+ case float_round_ties_away:
+ one = a->exp == -1;
+ break;
+ case float_round_to_zero:
+ one = false;
+ break;
+ case float_round_up:
+ one = !a->sign;
+ break;
+ case float_round_down:
+ one = a->sign;
+ break;
+ case float_round_to_odd:
+ one = true;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ frac_clear(a);
+ a->exp = 0;
+ if (one) {
+ a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
+ } else {
+ a->cls = float_class_zero;
+ }
+ return true;
+ }
+
+ if (a->exp >= frac_size) {
+ /* All integral */
+ return false;
+ }
+
+ if (N > 64 && a->exp < N - 64) {
+ /*
+ * Rounding is not in the low word -- shift lsb to bit 2,
+ * which leaves room for sticky and rounding bit.
+ */
+ shift_adj = (N - 1) - (a->exp + 2);
+ frac_shrjam(a, shift_adj);
+ frac_lsb = 1 << 2;
+ } else {
+ shift_adj = 0;
+ frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
+ }
+
+ frac_lsbm1 = frac_lsb >> 1;
+ rnd_mask = frac_lsb - 1;
+ rnd_even_mask = rnd_mask | frac_lsb;
+
+ if (!(a->frac_lo & rnd_mask)) {
+ /* Fractional bits already clear, undo the shift above. */
+ frac_shl(a, shift_adj);
+ return false;
+ }
+
+ switch (rmode) {
+ case float_round_nearest_even:
+ inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
+ break;
+ case float_round_ties_away:
+ inc = frac_lsbm1;
+ break;
+ case float_round_to_zero:
+ inc = 0;
+ break;
+ case float_round_up:
+ inc = a->sign ? 0 : rnd_mask;
+ break;
+ case float_round_down:
+ inc = a->sign ? rnd_mask : 0;
+ break;
+ case float_round_to_odd:
+ inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (shift_adj == 0) {
+ if (frac_addi(a, a, inc)) {
+ frac_shr(a, 1);
+ a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+ a->exp++;
+ }
+ a->frac_lo &= ~rnd_mask;
+ } else {
+ frac_addi(a, a, inc);
+ a->frac_lo &= ~rnd_mask;
+ /* Be careful shifting back, not to overflow */
+ frac_shl(a, shift_adj - 1);
+ if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
+ a->exp++;
+ } else {
+ frac_add(a, a, a);
+ }
+ }
+ return true;
+}
+
+static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
+ int scale, float_status *s,
+ const FloatFmt *fmt)
+{
+ switch (a->cls) {
+ case float_class_qnan:
+ case float_class_snan:
+ parts_return_nan(a, s);
+ break;
+ case float_class_zero:
+ case float_class_inf:
+ break;
+ case float_class_normal:
+ if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
+ float_raise(float_flag_inexact, s);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PULL 46/46] softfloat: Move round_to_int_and_pack to softfloat-parts.c.inc
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (44 preceding siblings ...)
2021-05-16 12:34 ` [PULL 45/46] softfloat: Move round_to_int to softfloat-parts.c.inc Richard Henderson
@ 2021-05-16 12:34 ` Richard Henderson
2021-05-16 13:19 ` [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) no-reply
2021-05-18 10:11 ` Peter Maydell
47 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2021-05-16 12:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Rename to parts$N_float_to_sint. Reimplement
float128_to_int{32,64}{_round_to_zero} with FloatParts128.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 365 +++++++++-----------------------------
fpu/softfloat-parts.c.inc | 64 +++++++
2 files changed, 145 insertions(+), 284 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 5647a05d5d..0dc2203477 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -829,6 +829,16 @@ static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
#define parts_round_to_int(A, R, C, S, F) \
PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
+static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
+ int scale, int64_t min, int64_t max,
+ float_status *s);
+static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
+ int scale, int64_t min, int64_t max,
+ float_status *s);
+
+#define parts_float_to_sint(P, R, Z, MN, MX, S) \
+ PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@@ -2352,69 +2362,8 @@ float128 float128_round_to_int(float128 a, float_status *s)
}
/*
- * Returns the result of converting the floating-point value `a' to
- * the two's complement integer format. The conversion is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic---which means in particular that the conversion is
- * rounded according to the current rounding mode. If `a' is a NaN,
- * the largest positive integer is returned. Otherwise, if the
- * conversion overflows, the largest integer with the same sign as `a'
- * is returned.
-*/
-
-static int64_t round_to_int_and_pack(FloatParts64 p, FloatRoundMode rmode,
- int scale, int64_t min, int64_t max,
- float_status *s)
-{
- int flags = 0;
- uint64_t r;
-
- switch (p.cls) {
- case float_class_snan:
- case float_class_qnan:
- flags = float_flag_invalid;
- r = max;
- break;
-
- case float_class_inf:
- flags = float_flag_invalid;
- r = p.sign ? min : max;
- break;
-
- case float_class_zero:
- return 0;
-
- case float_class_normal:
- /* TODO: 62 = N - 2, frac_size for rounding */
- if (parts_round_to_int_normal(&p, rmode, scale, 62)) {
- flags = float_flag_inexact;
- }
-
- if (p.exp <= DECOMPOSED_BINARY_POINT) {
- r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
- } else {
- r = UINT64_MAX;
- }
- if (p.sign) {
- if (r <= -(uint64_t)min) {
- r = -r;
- } else {
- flags = float_flag_invalid;
- r = min;
- }
- } else if (r > max) {
- flags = float_flag_invalid;
- r = max;
- }
- break;
-
- default:
- g_assert_not_reached();
- }
-
- float_raise(flags, s);
- return r;
-}
+ * Floating-point to signed integer conversions
+ */
int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
@@ -2422,7 +2371,7 @@ int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
}
int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
@@ -2431,7 +2380,7 @@ int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
}
int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
@@ -2440,7 +2389,7 @@ int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
}
int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
@@ -2449,7 +2398,7 @@ int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
@@ -2458,7 +2407,7 @@ int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float32_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
}
int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
@@ -2467,7 +2416,7 @@ int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float32_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
}
int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
@@ -2476,7 +2425,7 @@ int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float32_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
@@ -2485,7 +2434,7 @@ int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float64_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
}
int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
@@ -2494,7 +2443,7 @@ int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float64_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
}
int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
@@ -2503,7 +2452,52 @@ int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float64_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
+}
+
+int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+ float_status *s)
+{
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
+}
+
+int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+ float_status *s)
+{
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
+}
+
+int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+ float_status *s)
+{
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
+}
+
+static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
+ int scale, float_status *s)
+{
+ FloatParts128 p;
+
+ float128_unpack_canonical(&p, a, s);
+ return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
+}
+
+static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
+ int scale, float_status *s)
+{
+ FloatParts128 p;
+
+ float128_unpack_canonical(&p, a, s);
+ return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
int8_t float16_to_int8(float16 a, float_status *s)
@@ -2556,6 +2550,16 @@ int64_t float64_to_int64(float64 a, float_status *s)
return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
}
+int32_t float128_to_int32(float128 a, float_status *s)
+{
+ return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
+int64_t float128_to_int64(float128 a, float_status *s)
+{
+ return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
{
return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
@@ -2601,36 +2605,14 @@ int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
}
-/*
- * Returns the result of converting the floating-point value `a' to
- * the two's complement integer format.
- */
-
-int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
- float_status *s)
+int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
{
- FloatParts64 p;
-
- bfloat16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
+ return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
}
-int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
- float_status *s)
+int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
{
- FloatParts64 p;
-
- bfloat16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
-}
-
-int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
- float_status *s)
-{
- FloatParts64 p;
-
- bfloat16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
+ return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
}
int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
@@ -6554,191 +6536,6 @@ floatx80 floatx80_sqrt(floatx80 a, float_status *status)
0, zExp, zSig0, zSig1, status);
}
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 32-bit two's complement integer format. The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic---which means in particular that the conversion is rounded
-| according to the current rounding mode. If `a' is a NaN, the largest
-| positive integer is returned. Otherwise, if the conversion overflows, the
-| largest integer with the same sign as `a' is returned.
-*----------------------------------------------------------------------------*/
-
-int32_t float128_to_int32(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig0, aSig1;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
- if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
- aSig0 |= ( aSig1 != 0 );
- shiftCount = 0x4028 - aExp;
- if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
- return roundAndPackInt32(aSign, aSig0, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 32-bit two's complement integer format. The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic, except that the conversion is always rounded toward zero. If
-| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
-| conversion overflows, the largest integer with the same sign as `a' is
-| returned.
-*----------------------------------------------------------------------------*/
-
-int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig0, aSig1, savedASig;
- int32_t z;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- aSig0 |= ( aSig1 != 0 );
- if ( 0x401E < aExp ) {
- if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
- goto invalid;
- }
- else if ( aExp < 0x3FFF ) {
- if (aExp || aSig0) {
- float_raise(float_flag_inexact, status);
- }
- return 0;
- }
- aSig0 |= UINT64_C(0x0001000000000000);
- shiftCount = 0x402F - aExp;
- savedASig = aSig0;
- aSig0 >>= shiftCount;
- z = aSig0;
- if ( aSign ) z = - z;
- if ( ( z < 0 ) ^ aSign ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return aSign ? INT32_MIN : INT32_MAX;
- }
- if ( ( aSig0<<shiftCount ) != savedASig ) {
- float_raise(float_flag_inexact, status);
- }
- return z;
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 64-bit two's complement integer format. The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic---which means in particular that the conversion is rounded
-| according to the current rounding mode. If `a' is a NaN, the largest
-| positive integer is returned. Otherwise, if the conversion overflows, the
-| largest integer with the same sign as `a' is returned.
-*----------------------------------------------------------------------------*/
-
-int64_t float128_to_int64(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig0, aSig1;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
- shiftCount = 0x402F - aExp;
- if ( shiftCount <= 0 ) {
- if ( 0x403E < aExp ) {
- float_raise(float_flag_invalid, status);
- if ( ! aSign
- || ( ( aExp == 0x7FFF )
- && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
- )
- ) {
- return INT64_MAX;
- }
- return INT64_MIN;
- }
- shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
- }
- else {
- shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
- }
- return roundAndPackInt64(aSign, aSig0, aSig1, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 64-bit two's complement integer format. The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic, except that the conversion is always rounded toward zero.
-| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
-| the conversion overflows, the largest integer with the same sign as `a' is
-| returned.
-*----------------------------------------------------------------------------*/
-
-int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig0, aSig1;
- int64_t z;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
- shiftCount = aExp - 0x402F;
- if ( 0 < shiftCount ) {
- if ( 0x403E <= aExp ) {
- aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
- if ( ( a.high == UINT64_C(0xC03E000000000000) )
- && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
- if (aSig1) {
- float_raise(float_flag_inexact, status);
- }
- }
- else {
- float_raise(float_flag_invalid, status);
- if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
- return INT64_MAX;
- }
- }
- return INT64_MIN;
- }
- z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
- if ( (uint64_t) ( aSig1<<shiftCount ) ) {
- float_raise(float_flag_inexact, status);
- }
- }
- else {
- if ( aExp < 0x3FFF ) {
- if ( aExp | aSig0 | aSig1 ) {
- float_raise(float_flag_inexact, status);
- }
- return 0;
- }
- z = aSig0>>( - shiftCount );
- if ( aSig1
- || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
- float_raise(float_flag_inexact, status);
- }
- }
- if ( aSign ) z = - z;
- return z;
-
-}
-
/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point value
| `a' to the 64-bit unsigned integer format. The conversion is
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index b2c4624d8c..a897a5a743 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -751,3 +751,67 @@ static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
g_assert_not_reached();
}
}
+
+/*
+ * Returns the result of converting the floating-point value `a' to
+ * the two's complement integer format. The conversion is performed
+ * according to the IEC/IEEE Standard for Binary Floating-Point
+ * Arithmetic---which means in particular that the conversion is
+ * rounded according to the current rounding mode. If `a' is a NaN,
+ * the largest positive integer is returned. Otherwise, if the
+ * conversion overflows, the largest integer with the same sign as `a'
+ * is returned.
+*/
+static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
+ int scale, int64_t min, int64_t max,
+ float_status *s)
+{
+ int flags = 0;
+ uint64_t r;
+
+ switch (p->cls) {
+ case float_class_snan:
+ case float_class_qnan:
+ flags = float_flag_invalid;
+ r = max;
+ break;
+
+ case float_class_inf:
+ flags = float_flag_invalid;
+ r = p->sign ? min : max;
+ break;
+
+ case float_class_zero:
+ return 0;
+
+ case float_class_normal:
+ /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
+ if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
+ flags = float_flag_inexact;
+ }
+
+ if (p->exp <= DECOMPOSED_BINARY_POINT) {
+ r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
+ } else {
+ r = UINT64_MAX;
+ }
+ if (p->sign) {
+ if (r <= -(uint64_t)min) {
+ r = -r;
+ } else {
+ flags = float_flag_invalid;
+ r = min;
+ }
+ } else if (r > max) {
+ flags = float_flag_invalid;
+ r = max;
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ float_raise(flags, s);
+ return r;
+}
--
2.25.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1)
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (45 preceding siblings ...)
2021-05-16 12:34 ` [PULL 46/46] softfloat: Move round_to_int_and_pack " Richard Henderson
@ 2021-05-16 13:19 ` no-reply
2021-05-18 10:11 ` Peter Maydell
47 siblings, 0 replies; 52+ messages in thread
From: no-reply @ 2021-05-16 13:19 UTC (permalink / raw)
To: richard.henderson; +Cc: peter.maydell, qemu-devel
Patchew URL: https://patchew.org/QEMU/20210516123431.718318-1-richard.henderson@linaro.org/
Hi,
This series seems to have some coding style problems. See output below for
more information:
Type: series
Message-id: 20210516123431.718318-1-richard.henderson@linaro.org
Subject: [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1)
=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===
Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
* [new tag] patchew/20210516123431.718318-1-richard.henderson@linaro.org -> patchew/20210516123431.718318-1-richard.henderson@linaro.org
Switched to a new branch 'test'
79b3657 softfloat: Move round_to_int_and_pack to softfloat-parts.c.inc
18be64c softfloat: Move round_to_int to softfloat-parts.c.inc
d819a18 softfloat: Convert float-to-float conversions with float128
6af91f7 softfloat: Split float_to_float
1f5260c softfloat: Move div_floats to softfloat-parts.c.inc
f4eca57 softfloat: Introduce sh[lr]_double primitives
4d88a4d softfloat: Tidy mul128By64To192
885f3ae softfloat: Use add192 in mul128To256
5d10ba5 softfloat: Use mulu64 for mul64To128
cdddd66 softfloat: Move muladd_floats to softfloat-parts.c.inc
13c5b80 softfloat: Move mul_floats to softfloat-parts.c.inc
5bc8e89 softfloat: Implement float128_add/sub via parts
5ab3959 softfloat: Move addsub_floats to softfloat-parts.c.inc
460d30c softfloat: Use uadd64_carry, usub64_borrow in softfloat-macros.h
99f3265 softfloat: Move round_canonical to softfloat-parts.c.inc
c0d23dc softfloat: Move sf_canonicalize to softfloat-parts.c.inc
21a70b0 softfloat: Move pick_nan_muladd to softfloat-parts.c.inc
2f42141 softfloat: Move pick_nan to softfloat-parts.c.inc
fc6b980 softfloat: Move return_nan to softfloat-parts.c.inc
7ae2da4 softfloat: Convert float128_default_nan to parts
930550f softfloat: Convert float128_silence_nan to parts
d5c84b6 softfloat: Rearrange FloatParts64
166bea0 softfloat: Use pointers with parts_silence_nan
70472ce softfloat: Use pointers with ftype_round_pack_canonical
fa88f30 softfloat: Use pointers with ftype_unpack_canonical
b3f4218 softfloat: Use pointers with ftype_pack_raw
6efd675 softfloat: Use pointers with pack_raw
f550064 softfloat: Use pointers with ftype_unpack_raw
c91d07c softfloat: Use pointers with unpack_raw
abeae2e softfloat: Use pointers with parts_default_nan
1ebb32c softfloat: Move type-specific pack/unpack routines
5988d22 softfloat: Rename FloatParts to FloatParts64
4929b37 softfloat: Do not produce a default_nan from parts_silence_nan
1cc195f target/mips: Set set_default_nan_mode with set_snan_bit_is_one
5c7f249 softfloat: fix return_nan vs default_nan_mode
ecf52bd softfloat: Use return_nan in float_to_float
db50e0a softfloat: Add float_cmask and constants
c8a1ed9 softfloat: Tidy a * b + inf return
4e15ceb softfloat: Use float_raise in more places
4c3af8c softfloat: Inline float_raise
0c1fefd softfloat: Move the binary point to the msb
b48d53f tests/fp: add quad support to the benchmark utility
6dcc2d6 accel/tcg: Use add/sub overflow routines in tcg-runtime-gvec.c
e22fa0d qemu/host-utils: Add wrappers for carry builtins
f7e908a qemu/host-utils: Add wrappers for overflow builtins
e2a38a2 qemu/host-utils: Use __builtin_bitreverseN
=== OUTPUT BEGIN ===
1/46 Checking commit e2a38a2b2f12 (qemu/host-utils: Use __builtin_bitreverseN)
WARNING: architecture specific defines should be avoided
#26: FILE: include/qemu/host-utils.h:275:
+#if __has_builtin(__builtin_bitreverse8)
WARNING: architecture specific defines should be avoided
#44: FILE: include/qemu/host-utils.h:296:
+#if __has_builtin(__builtin_bitreverse16)
WARNING: architecture specific defines should be avoided
#62: FILE: include/qemu/host-utils.h:319:
+#if __has_builtin(__builtin_bitreverse32)
WARNING: architecture specific defines should be avoided
#80: FILE: include/qemu/host-utils.h:342:
+#if __has_builtin(__builtin_bitreverse64)
total: 0 errors, 4 warnings, 64 lines checked
Patch 1/46 has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
2/46 Checking commit f7e908a5a70f (qemu/host-utils: Add wrappers for overflow builtins)
WARNING: architecture specific defines should be avoided
#35: FILE: include/qemu/host-utils.h:369:
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
WARNING: architecture specific defines should be avoided
#53: FILE: include/qemu/host-utils.h:387:
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
WARNING: architecture specific defines should be avoided
#71: FILE: include/qemu/host-utils.h:405:
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
WARNING: architecture specific defines should be avoided
#89: FILE: include/qemu/host-utils.h:423:
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
WARNING: architecture specific defines should be avoided
#108: FILE: include/qemu/host-utils.h:442:
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
WARNING: architecture specific defines should be avoided
#127: FILE: include/qemu/host-utils.h:461:
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
WARNING: architecture specific defines should be avoided
#146: FILE: include/qemu/host-utils.h:480:
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
WARNING: architecture specific defines should be avoided
#165: FILE: include/qemu/host-utils.h:499:
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
WARNING: architecture specific defines should be avoided
#183: FILE: include/qemu/host-utils.h:517:
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
WARNING: architecture specific defines should be avoided
#202: FILE: include/qemu/host-utils.h:536:
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
WARNING: architecture specific defines should be avoided
#222: FILE: include/qemu/host-utils.h:556:
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
WARNING: architecture specific defines should be avoided
#241: FILE: include/qemu/host-utils.h:575:
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
total: 0 errors, 12 warnings, 231 lines checked
Patch 2/46 has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
3/46 Checking commit e22fa0d5b6fa (qemu/host-utils: Add wrappers for carry builtins)
WARNING: architecture specific defines should be avoided
#44: FILE: include/qemu/host-utils.h:595:
+#if __has_builtin(__builtin_addcll)
WARNING: architecture specific defines should be avoided
#69: FILE: include/qemu/host-utils.h:620:
+#if __has_builtin(__builtin_subcll)
total: 0 errors, 2 warnings, 62 lines checked
Patch 3/46 has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
4/46 Checking commit 6dcc2d65e93d (accel/tcg: Use add/sub overflow routines in tcg-runtime-gvec.c)
5/46 Checking commit b48d53fa620f (tests/fp: add quad support to the benchmark utility)
WARNING: line over 80 characters
#183: FILE: tests/fp/fp-bench.c:458:
+ GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n)
WARNING: line over 80 characters
#200: FILE: tests/fp/fp-bench.c:521:
+ fprintf(stderr, " -p = floating point precision (single, double, quad[soft only]). "
total: 0 errors, 2 warnings, 185 lines checked
Patch 5/46 has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
6/46 Checking commit 0c1fefd5c3f0 (softfloat: Move the binary point to the msb)
7/46 Checking commit 4c3af8cc9fe3 (softfloat: Inline float_raise)
8/46 Checking commit 4e15cebadbae (softfloat: Use float_raise in more places)
9/46 Checking commit c8a1ed948f41 (softfloat: Tidy a * b + inf return)
10/46 Checking commit db50e0afca55 (softfloat: Add float_cmask and constants)
11/46 Checking commit ecf52bd4406e (softfloat: Use return_nan in float_to_float)
12/46 Checking commit 5c7f249cdc9d (softfloat: fix return_nan vs default_nan_mode)
13/46 Checking commit 1cc195fa4cd7 (target/mips: Set set_default_nan_mode with set_snan_bit_is_one)
14/46 Checking commit 4929b3715c49 (softfloat: Do not produce a default_nan from parts_silence_nan)
15/46 Checking commit 5988d229db64 (softfloat: Rename FloatParts to FloatParts64)
WARNING: line over 80 characters
#237: FILE: fpu/softfloat.c:928:
+static FloatParts64 pick_nan_muladd(FloatParts64 a, FloatParts64 b, FloatParts64 c,
WARNING: line over 80 characters
#392: FILE: fpu/softfloat.c:1347:
+static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c,
WARNING: line over 80 characters
#839: FILE: fpu/softfloat.c:3189:
+static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
WARNING: Block comments use a leading /* on a separate line
#877: FILE: fpu/softfloat.c:3374:
+ /* The largest float type (even though not supported by FloatParts64)
WARNING: line over 80 characters
#928: FILE: fpu/softfloat.c:3425:
+static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
total: 0 errors, 5 warnings, 1002 lines checked
Patch 15/46 has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
16/46 Checking commit 1ebb32c57926 (softfloat: Move type-specific pack/unpack routines)
17/46 Checking commit abeae2e27921 (softfloat: Use pointers with parts_default_nan)
18/46 Checking commit c91d07c41cae (softfloat: Use pointers with unpack_raw)
19/46 Checking commit f550064760e5 (softfloat: Use pointers with ftype_unpack_raw)
20/46 Checking commit 6efd67588db3 (softfloat: Use pointers with pack_raw)
21/46 Checking commit b3f421862b00 (softfloat: Use pointers with ftype_pack_raw)
22/46 Checking commit fa88f30a7b7e (softfloat: Use pointers with ftype_unpack_canonical)
23/46 Checking commit 70472cef4a39 (softfloat: Use pointers with ftype_round_pack_canonical)
24/46 Checking commit 166bea0928a4 (softfloat: Use pointers with parts_silence_nan)
25/46 Checking commit d5c84b6677af (softfloat: Rearrange FloatParts64)
26/46 Checking commit 930550f4640a (softfloat: Convert float128_silence_nan to parts)
27/46 Checking commit 7ae2da49e108 (softfloat: Convert float128_default_nan to parts)
28/46 Checking commit fc6b980782ca (softfloat: Move return_nan to softfloat-parts.c.inc)
Use of uninitialized value $acpi_testexpected in string eq at ./scripts/checkpatch.pl line 1529.
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#19:
new file mode 100644
total: 0 errors, 1 warnings, 124 lines checked
Patch 28/46 has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
29/46 Checking commit 2f421417242d (softfloat: Move pick_nan to softfloat-parts.c.inc)
30/46 Checking commit 21a70b06443a (softfloat: Move pick_nan_muladd to softfloat-parts.c.inc)
31/46 Checking commit c0d23dc89648 (softfloat: Move sf_canonicalize to softfloat-parts.c.inc)
32/46 Checking commit 99f3265020ae (softfloat: Move round_canonical to softfloat-parts.c.inc)
33/46 Checking commit 460d30c52eef (softfloat: Use uadd64_carry, usub64_borrow in softfloat-macros.h)
34/46 Checking commit 5ab3959d792b (softfloat: Move addsub_floats to softfloat-parts.c.inc)
Use of uninitialized value $acpi_testexpected in string eq at ./scripts/checkpatch.pl line 1529.
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#20:
new file mode 100644
ERROR: space required after that ',' (ctx:VxV)
#271: FILE: fpu/softfloat.c:958:
+#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
^
ERROR: space required after that ',' (ctx:VxV)
#271: FILE: fpu/softfloat.c:958:
+#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
^
ERROR: space required after that ',' (ctx:VxV)
#271: FILE: fpu/softfloat.c:958:
+#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
^
ERROR: space required after that ',' (ctx:VxV)
#272: FILE: fpu/softfloat.c:959:
+#define FloatPartsN glue(FloatParts,N)
^
total: 4 errors, 1 warnings, 489 lines checked
Patch 34/46 has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
35/46 Checking commit 5bc8e893b82c (softfloat: Implement float128_add/sub via parts)
36/46 Checking commit 13c5b809ad52 (softfloat: Move mul_floats to softfloat-parts.c.inc)
ERROR: space required after that ',' (ctx:VxV)
#152: FILE: fpu/softfloat.c:1004:
+#define FloatPartsW glue(FloatParts,W)
^
total: 1 errors, 0 warnings, 350 lines checked
Patch 36/46 has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
37/46 Checking commit cdddd6645bf8 (softfloat: Move muladd_floats to softfloat-parts.c.inc)
38/46 Checking commit 5d10ba5c97c7 (softfloat: Use mulu64 for mul64To128)
39/46 Checking commit 885f3aee0766 (softfloat: Use add192 in mul128To256)
ERROR: space prohibited after that open parenthesis '('
#63: FILE: include/fpu/softfloat-macros.h:527:
+ add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2);
total: 1 errors, 0 warnings, 46 lines checked
Patch 39/46 has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
40/46 Checking commit 4d88a4d4f75f (softfloat: Tidy mul128By64To192)
41/46 Checking commit f4eca575050b (softfloat: Introduce sh[lr]_double primitives)
WARNING: architecture specific defines should be avoided
#204: FILE: include/fpu/softfloat-macros.h:98:
+#if defined(__x86_64__)
WARNING: architecture specific defines should be avoided
#222: FILE: include/fpu/softfloat-macros.h:116:
+#if defined(__x86_64__)
total: 0 errors, 2 warnings, 199 lines checked
Patch 41/46 has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
42/46 Checking commit 1f5260ca2a6d (softfloat: Move div_floats to softfloat-parts.c.inc)
43/46 Checking commit 6af91f773b19 (softfloat: Split float_to_float)
44/46 Checking commit d819a1897339 (softfloat: Convert float-to-float conversions with float128)
45/46 Checking commit 18be64c3bced (softfloat: Move round_to_int to softfloat-parts.c.inc)
46/46 Checking commit 79b3657a3f44 (softfloat: Move round_to_int_and_pack to softfloat-parts.c.inc)
=== OUTPUT END ===
Test command exited with code: 1
The full log is available at
http://patchew.org/logs/20210516123431.718318-1-richard.henderson@linaro.org/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-devel@redhat.com
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1)
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
` (46 preceding siblings ...)
2021-05-16 13:19 ` [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) no-reply
@ 2021-05-18 10:11 ` Peter Maydell
47 siblings, 0 replies; 52+ messages in thread
From: Peter Maydell @ 2021-05-18 10:11 UTC (permalink / raw)
To: Richard Henderson; +Cc: QEMU Developers
On Sun, 16 May 2021 at 13:34, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> We have 46 of 72 patches reviewed, and they include ones that
> are of express intrest to David, for s390x. Let's merge these
> so that the next patch set is much smaller.
>
>
> r~
>
>
> The following changes since commit 6d34aa9969ff85ca6eaeb4dc1988a4d4e13e7d79:
>
> Merge remote-tracking branch 'remotes/marcandre/tags/rtd-pull-request' into staging (2021-05-14 17:20:08 +0100)
>
> are available in the Git repository at:
>
> https://gitlab.com/rth7680/qemu.git tags/pull-fp-20210516
>
> for you to fetch changes up to 463b3f0d7fa11054daeb5ca22346f77d566795bf:
>
> softfloat: Move round_to_int_and_pack to softfloat-parts.c.inc (2021-05-16 07:13:51 -0500)
>
> ----------------------------------------------------------------
> Reorg FloatParts to use QEMU_GENERIC.
> Begin replacing the Berkeley float128 routines with FloatParts128.
> - includes a new implementation of float128_muladd
> - includes the snan silencing that was missing from
> float{32,64}_to_float128 and float128_to_float{32,64}.
> - does not include float128_min/max* (written but not yet reviewed).
Applied, thanks.
Please update the changelog at https://wiki.qemu.org/ChangeLog/6.1
for any user-visible changes.
-- PMM
^ permalink raw reply [flat|nested] 52+ messages in thread