All of lore.kernel.org
 help / color / mirror / Atom feed
From: Taylor Simpson <tsimpson@quicinc.com>
To: qemu-devel@nongnu.org
Cc: ale@rev.ng, bcain@quicinc.com, philmd@redhat.com,
	richard.henderson@linaro.org, laurent@vivier.eu,
	tsimpson@quicinc.com, alex.bennee@linaro.org
Subject: [PATCH v8 17/35] Hexagon (target/hexagon/fma_emu.[ch]) utility functions
Date: Sun,  7 Feb 2021 23:46:07 -0600	[thread overview]
Message-ID: <1612763186-18161-18-git-send-email-tsimpson@quicinc.com> (raw)
In-Reply-To: <1612763186-18161-1-git-send-email-tsimpson@quicinc.com>

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/fma_emu.h |  36 +++
 target/hexagon/fma_emu.c | 702 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 738 insertions(+)
 create mode 100644 target/hexagon/fma_emu.h
 create mode 100644 target/hexagon/fma_emu.c

diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
new file mode 100644
index 0000000..e3b99a8
--- /dev/null
+++ b/target/hexagon/fma_emu.h
@@ -0,0 +1,36 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HEXAGON_FMA_EMU_H
+#define HEXAGON_FMA_EMU_H
+
+static inline bool is_finite(float64 x)
+{
+    return !float64_is_any_nan(x) && !float64_is_infinity(x);
+}
+
+int32_t float64_getexp(float64 f64);
+int32_t float32_getexp(float32 f32);
+float32 infinite_float32(uint8_t sign);
+float32 internal_fmafx(float32 a, float32 b, float32 c,
+                       int scale, float_status *fp_status);
+float32 internal_mpyf(float32 a, float32 b, float_status *fp_status);
+float64 internal_mpyhh(float64 a, float64 b,
+                       unsigned long long int accumulated,
+                       float_status *fp_status);
+
+#endif
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
new file mode 100644
index 0000000..842d903
--- /dev/null
+++ b/target/hexagon/fma_emu.c
@@ -0,0 +1,702 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/int128.h"
+#include "fpu/softfloat.h"
+#include "macros.h"
+#include "conv_emu.h"
+#include "fma_emu.h"
+
+#define DF_INF_EXP     0x7ff
+#define DF_BIAS        1023
+#define DF_MANTBITS    52
+#define DF_NAN         0xffffffffffffffffULL
+#define DF_INF         0x7ff0000000000000ULL
+#define DF_MINUS_INF   0xfff0000000000000ULL
+#define DF_MAXF        0x7fefffffffffffffULL
+#define DF_MINUS_MAXF  0xffefffffffffffffULL
+
+#define SF_INF_EXP     0xff
+#define SF_BIAS        127
+#define SF_MANTBITS    23
+#define SF_INF         0x7f800000
+#define SF_MINUS_INF   0xff800000
+#define SF_MAXF        0x7f7fffff
+#define SF_MINUS_MAXF  0xff7fffff
+
+#define HF_INF_EXP 0x1f
+#define HF_BIAS 15
+
+#define WAY_BIG_EXP 4096
+
+typedef union {
+    double f;
+    uint64_t i;
+    struct {
+        uint64_t mant:52;
+        uint64_t exp:11;
+        uint64_t sign:1;
+    };
+} Double;
+
+typedef union {
+    float f;
+    uint32_t i;
+    struct {
+        uint32_t mant:23;
+        uint32_t exp:8;
+        uint32_t sign:1;
+    };
+} Float;
+
+static inline uint64_t float64_getmant(float64 f64)
+{
+    Double a = { .i = f64 };
+    if (float64_is_normal(f64)) {
+        return a.mant | 1ULL << 52;
+    }
+    if (float64_is_zero(f64)) {
+        return 0;
+    }
+    if (float64_is_denormal(f64)) {
+        return a.mant;
+    }
+    return ~0ULL;
+}
+
+int32_t float64_getexp(float64 f64)
+{
+    Double a = { .i = f64 };
+    if (float64_is_normal(f64)) {
+        return a.exp;
+    }
+    if (float64_is_denormal(f64)) {
+        return a.exp + 1;
+    }
+    return -1;
+}
+
+static inline uint64_t float32_getmant(float32 f32)
+{
+    Float a = { .i = f32 };
+    if (float32_is_normal(f32)) {
+        return a.mant | 1ULL << 23;
+    }
+    if (float32_is_zero(f32)) {
+        return 0;
+    }
+    if (float32_is_denormal(f32)) {
+        return a.mant;
+    }
+    return ~0ULL;
+}
+
+int32_t float32_getexp(float32 f32)
+{
+    Float a = { .i = f32 };
+    if (float32_is_normal(f32)) {
+        return a.exp;
+    }
+    if (float32_is_denormal(f32)) {
+        return a.exp + 1;
+    }
+    return -1;
+}
+
+static inline uint32_t int128_getw0(Int128 x)
+{
+    return int128_getlo(x);
+}
+
+static inline uint32_t int128_getw1(Int128 x)
+{
+    return int128_getlo(x) >> 32;
+}
+
+static inline Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
+{
+    Int128 a, b;
+    uint64_t pp0, pp1a, pp1b, pp1s, pp2;
+
+    a = int128_make64(ai);
+    b = int128_make64(bi);
+    pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
+    pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
+    pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
+    pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
+
+    pp1s = pp1a + pp1b;
+    if ((pp1s < pp1a) || (pp1s < pp1b)) {
+        pp2 += (1ULL << 32);
+    }
+    uint64_t ret_low = pp0 + (pp1s << 32);
+    if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
+        pp2 += 1;
+    }
+
+    return int128_make128(ret_low, pp2 + (pp1s >> 32));
+}
+
+static inline Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
+{
+    Int128 ret = int128_sub(a, b);
+    if (borrow != 0) {
+        ret = int128_sub(ret, int128_one());
+    }
+    return ret;
+}
+
+typedef struct {
+    Int128 mant;
+    int32_t exp;
+    uint8_t sign;
+    uint8_t guard;
+    uint8_t round;
+    uint8_t sticky;
+} Accum;
+
+static inline void accum_init(Accum *p)
+{
+    p->mant = int128_zero();
+    p->exp = 0;
+    p->sign = 0;
+    p->guard = 0;
+    p->round = 0;
+    p->sticky = 0;
+}
+
+static inline Accum accum_norm_left(Accum a)
+{
+    a.exp--;
+    a.mant = int128_lshift(a.mant, 1);
+    a.mant = int128_or(a.mant, int128_make64(a.guard));
+    a.guard = a.round;
+    a.round = a.sticky;
+    return a;
+}
+
+static inline Accum accum_norm_right(Accum a, int amt)
+{
+    if (amt > 130) {
+        a.sticky |=
+            a.round | a.guard | int128_nz(a.mant);
+        a.guard = a.round = 0;
+        a.mant = int128_zero();
+        a.exp += amt;
+        return a;
+
+    }
+    while (amt >= 64) {
+        a.sticky |= a.round | a.guard | (int128_getlo(a.mant) != 0);
+        a.guard = (int128_getlo(a.mant) >> 63) & 1;
+        a.round = (int128_getlo(a.mant) >> 62) & 1;
+        a.mant = int128_make64(int128_gethi(a.mant));
+        a.exp += 64;
+        amt -= 64;
+    }
+    while (amt > 0) {
+        a.exp++;
+        a.sticky |= a.round;
+        a.round = a.guard;
+        a.guard = int128_getlo(a.mant) & 1;
+        a.mant = int128_rshift(a.mant, 1);
+        amt--;
+    }
+    return a;
+}
+
+/*
+ * On the add/sub, we need to be able to shift out lots of bits, but need a
+ * sticky bit for what was shifted out, I think.
+ */
+static Accum accum_add(Accum a, Accum b);
+
+static inline Accum accum_sub(Accum a, Accum b, int negate)
+{
+    Accum ret;
+    accum_init(&ret);
+    int borrow;
+
+    if (a.sign != b.sign) {
+        b.sign = !b.sign;
+        return accum_add(a, b);
+    }
+    if (b.exp > a.exp) {
+        /* small - big == - (big - small) */
+        return accum_sub(b, a, !negate);
+    }
+    if ((b.exp == a.exp) && (int128_gt(b.mant, a.mant))) {
+        /* small - big == - (big - small) */
+        return accum_sub(b, a, !negate);
+    }
+
+    while (a.exp > b.exp) {
+        /* Try to normalize exponents: shrink a exponent and grow mantissa */
+        if (int128_gethi(a.mant) & (1ULL << 62)) {
+            /* Can't grow a any more */
+            break;
+        } else {
+            a = accum_norm_left(a);
+        }
+    }
+
+    while (a.exp > b.exp) {
+        /* Try to normalize exponents: grow b exponent and shrink mantissa */
+        /* Keep around shifted out bits... we might need those later */
+        b = accum_norm_right(b, a.exp - b.exp);
+    }
+
+    if ((int128_gt(b.mant, a.mant))) {
+        return accum_sub(b, a, !negate);
+    }
+
+    /* OK, now things should be normalized! */
+    ret.sign = a.sign;
+    ret.exp = a.exp;
+    assert(!int128_gt(b.mant, a.mant));
+    borrow = (b.round << 2) | (b.guard << 1) | b.sticky;
+    ret.mant = int128_sub_borrow(a.mant, b.mant, (borrow != 0));
+    borrow = 0 - borrow;
+    ret.guard = (borrow >> 2) & 1;
+    ret.round = (borrow >> 1) & 1;
+    ret.sticky = (borrow >> 0) & 1;
+    if (negate) {
+        ret.sign = !ret.sign;
+    }
+    return ret;
+}
+
+static Accum accum_add(Accum a, Accum b)
+{
+    Accum ret;
+    accum_init(&ret);
+    if (a.sign != b.sign) {
+        b.sign = !b.sign;
+        return accum_sub(a, b, 0);
+    }
+    if (b.exp > a.exp) {
+        /* small + big ==  (big + small) */
+        return accum_add(b, a);
+    }
+    if ((b.exp == a.exp) && int128_gt(b.mant, a.mant)) {
+        /* small + big ==  (big + small) */
+        return accum_add(b, a);
+    }
+
+    while (a.exp > b.exp) {
+        /* Try to normalize exponents: shrink a exponent and grow mantissa */
+        if (int128_gethi(a.mant) & (1ULL << 62)) {
+            /* Can't grow a any more */
+            break;
+        } else {
+            a = accum_norm_left(a);
+        }
+    }
+
+    while (a.exp > b.exp) {
+        /* Try to normalize exponents: grow b exponent and shrink mantissa */
+        /* Keep around shifted out bits... we might need those later */
+        b = accum_norm_right(b, a.exp - b.exp);
+    }
+
+    /* OK, now things should be normalized! */
+    if (int128_gt(b.mant, a.mant)) {
+        return accum_add(b, a);
+    };
+    ret.sign = a.sign;
+    ret.exp = a.exp;
+    assert(!int128_gt(b.mant, a.mant));
+    ret.mant = int128_add(a.mant, b.mant);
+    ret.guard = b.guard;
+    ret.round = b.round;
+    ret.sticky = b.sticky;
+    return ret;
+}
+
+/* Return an infinity with requested sign */
+static inline float64 infinite_float64(uint8_t sign)
+{
+    if (sign) {
+        return make_float64(DF_MINUS_INF);
+    } else {
+        return make_float64(DF_INF);
+    }
+}
+
+/* Return a maximum finite value with requested sign */
+static inline float64 maxfinite_float64(uint8_t sign)
+{
+    if (sign) {
+        return make_float64(DF_MINUS_MAXF);
+    } else {
+        return make_float64(DF_MAXF);
+    }
+}
+
+/* Return a zero value with requested sign */
+static inline float64 zero_float64(uint8_t sign)
+{
+    if (sign) {
+        return make_float64(0x8000000000000000);
+    } else {
+        return float64_zero;
+    }
+}
+
+/* Return an infinity with the requested sign */
+float32 infinite_float32(uint8_t sign)
+{
+    if (sign) {
+        return make_float32(SF_MINUS_INF);
+    } else {
+        return make_float32(SF_INF);
+    }
+}
+
+/* Return a maximum finite value with the requested sign */
+static inline float32 maxfinite_float32(uint8_t sign)
+{
+    if (sign) {
+        return make_float32(SF_MINUS_MAXF);
+    } else {
+        return make_float32(SF_MAXF);
+    }
+}
+
+/* Return a zero value with requested sign */
+static inline float32 zero_float32(uint8_t sign)
+{
+    if (sign) {
+        return make_float32(0x80000000);
+    } else {
+        return float32_zero;
+    }
+}
+
+#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
+static inline SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
+{ \
+    if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
+        && ((a.guard | a.round | a.sticky) == 0)) { \
+        /* result zero */ \
+        switch (fp_status->float_rounding_mode) { \
+        case float_round_down: \
+            return zero_##SUFFIX(1); \
+        default: \
+            return zero_##SUFFIX(0); \
+        } \
+    } \
+    /* Normalize right */ \
+    /* We want MANTBITS bits of mantissa plus the leading one. */ \
+    /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
+    /* So we need to normalize right while the high word is non-zero and \
+    * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
+    while ((int128_gethi(a.mant) != 0) || \
+           ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
+        a = accum_norm_right(a, 1); \
+    } \
+    /* \
+     * OK, now normalize left \
+     * We want to normalize left until we have a leading one in bit 24 \
+     * Theoretically, we only need to shift a maximum of one to the left if we \
+     * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
+     * shoudl be 0  \
+     */ \
+    while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
+        a = accum_norm_left(a); \
+    } \
+    /* \
+     * OK, now we might need to denormalize because of potential underflow. \
+     * We need to do this before rounding, and rounding might make us normal \
+     * again \
+     */ \
+    while (a.exp <= 0) { \
+        a = accum_norm_right(a, 1 - a.exp); \
+        /* \
+         * Do we have underflow? \
+         * That's when we get an inexact answer because we ran out of bits \
+         * in a denormal. \
+         */ \
+        if (a.guard || a.round || a.sticky) { \
+            float_raise(float_flag_underflow, fp_status); \
+        } \
+    } \
+    /* OK, we're relatively canonical... now we need to round */ \
+    if (a.guard || a.round || a.sticky) { \
+        float_raise(float_flag_inexact, fp_status); \
+        switch (fp_status->float_rounding_mode) { \
+        case float_round_to_zero: \
+            /* Chop and we're done */ \
+            break; \
+        case float_round_up: \
+            if (a.sign == 0) { \
+                a.mant = int128_add(a.mant, int128_one()); \
+            } \
+            break; \
+        case float_round_down: \
+            if (a.sign != 0) { \
+                a.mant = int128_add(a.mant, int128_one()); \
+            } \
+            break; \
+        default: \
+            if (a.round || a.sticky) { \
+                /* round up if guard is 1, down if guard is zero */ \
+                a.mant = int128_add(a.mant, int128_make64(a.guard)); \
+            } else if (a.guard) { \
+                /* exactly .5, round up if odd */ \
+                a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
+            } \
+            break; \
+        } \
+    } \
+    /* \
+     * OK, now we might have carried all the way up. \
+     * So we might need to shr once \
+     * at least we know that the lsb should be zero if we rounded and \
+     * got a carry out... \
+     */ \
+    if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
+        a = accum_norm_right(a, 1); \
+    } \
+    /* Overflow? */ \
+    if (a.exp >= INF_EXP) { \
+        /* Yep, inf result */ \
+        float_raise(float_flag_overflow, fp_status); \
+        float_raise(float_flag_inexact, fp_status); \
+        switch (fp_status->float_rounding_mode) { \
+        case float_round_to_zero: \
+            return maxfinite_##SUFFIX(a.sign); \
+        case float_round_up: \
+            if (a.sign == 0) { \
+                return infinite_##SUFFIX(a.sign); \
+            } else { \
+                return maxfinite_##SUFFIX(a.sign); \
+            } \
+        case float_round_down: \
+            if (a.sign != 0) { \
+                return infinite_##SUFFIX(a.sign); \
+            } else { \
+                return maxfinite_##SUFFIX(a.sign); \
+            } \
+        default: \
+            return infinite_##SUFFIX(a.sign); \
+        } \
+    } \
+    /* Underflow? */ \
+    if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
+        /* Leading one means: No, we're normal. So, we should be done... */ \
+        INTERNAL_TYPE ret; \
+        ret.i = 0; \
+        ret.sign = a.sign; \
+        ret.exp = a.exp; \
+        ret.mant = int128_getlo(a.mant); \
+        return ret.i; \
+    } \
+    assert(a.exp == 1); \
+    INTERNAL_TYPE ret; \
+    ret.i = 0; \
+    ret.sign = a.sign; \
+    ret.exp = 0; \
+    ret.mant = int128_getlo(a.mant); \
+    return ret.i; \
+}
+
+GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
+GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
+
+static bool is_inf_prod(float64 a, float64 b)
+{
+    return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
+            (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
+            (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
+}
+
+static inline float64 special_fma(float64 a, float64 b, float64 c,
+                                  float_status *fp_status)
+{
+    float64 ret = make_float64(0);
+
+    /*
+     * If A multiplied by B is an exact infinity and C is also an infinity
+     * but with the opposite sign, FMA returns NaN and raises invalid.
+     */
+    uint8_t a_sign = float64_is_neg(a);
+    uint8_t b_sign = float64_is_neg(b);
+    uint8_t c_sign = float64_is_neg(c);
+    if (is_inf_prod(a, b) && float64_is_infinity(c)) {
+        if ((a_sign ^ b_sign) != c_sign) {
+            ret = make_float64(DF_NAN);
+            float_raise(float_flag_invalid, fp_status);
+            return ret;
+        }
+    }
+    if ((float64_is_infinity(a) && float64_is_zero(b)) ||
+        (float64_is_zero(a) && float64_is_infinity(b))) {
+        ret = make_float64(DF_NAN);
+        float_raise(float_flag_invalid, fp_status);
+        return ret;
+    }
+    /*
+     * If none of the above checks are true and C is a NaN,
+     * a NaN shall be returned
+     * If A or B are NaN, a NAN shall be returned.
+     */
+    if (float64_is_any_nan(a) ||
+        float64_is_any_nan(b) ||
+        float64_is_any_nan(c)) {
+        if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
+            float_raise(float_flag_invalid, fp_status);
+        }
+        if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
+            float_raise(float_flag_invalid, fp_status);
+        }
+        if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
+            float_raise(float_flag_invalid, fp_status);
+        }
+        ret = make_float64(DF_NAN);
+        return ret;
+    }
+    /*
+     * We have checked for adding opposite-signed infinities.
+     * Other infinities return infinity with the correct sign
+     */
+    if (float64_is_infinity(c)) {
+        ret = infinite_float64(c_sign);
+        return ret;
+    }
+    if (float64_is_infinity(a) || float64_is_infinity(b)) {
+        ret = infinite_float64(a_sign ^ b_sign);
+        return ret;
+    }
+    g_assert_not_reached();
+}
+
+static inline float32 special_fmaf(float32 a, float32 b, float32 c,
+                                 float_status *fp_status)
+{
+    float64 aa, bb, cc;
+    aa = float32_to_float64(a, fp_status);
+    bb = float32_to_float64(b, fp_status);
+    cc = float32_to_float64(c, fp_status);
+    return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
+}
+
+float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
+                       float_status *fp_status)
+{
+    Accum prod;
+    Accum acc;
+    Accum result;
+    accum_init(&prod);
+    accum_init(&acc);
+    accum_init(&result);
+
+    uint8_t a_sign = float32_is_neg(a);
+    uint8_t b_sign = float32_is_neg(b);
+    uint8_t c_sign = float32_is_neg(c);
+    if (float32_is_infinity(a) ||
+        float32_is_infinity(b) ||
+        float32_is_infinity(c)) {
+        return special_fmaf(a, b, c, fp_status);
+    }
+    if (float32_is_any_nan(a) ||
+        float32_is_any_nan(b) ||
+        float32_is_any_nan(c)) {
+        return special_fmaf(a, b, c, fp_status);
+    }
+    if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
+        float32 tmp = float32_mul(a, b, fp_status);
+        tmp = float32_add(tmp, c, fp_status);
+        return tmp;
+    }
+
+    /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
+    prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
+
+    /*
+     * Note: extracting the mantissa into an int is multiplying by
+     * 2**23, so adjust here
+     */
+    prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
+    prod.sign = a_sign ^ b_sign;
+    if (float32_is_zero(a) || float32_is_zero(b)) {
+        prod.exp = -2 * WAY_BIG_EXP;
+    }
+    if ((scale > 0) && float32_is_denormal(c)) {
+        acc.mant = int128_mul_6464(0, 0);
+        acc.exp = -WAY_BIG_EXP;
+        acc.sign = c_sign;
+        acc.sticky = 1;
+        result = accum_add(prod, acc);
+    } else if (!float32_is_zero(c)) {
+        acc.mant = int128_mul_6464(float32_getmant(c), 1);
+        acc.exp = float32_getexp(c);
+        acc.sign = c_sign;
+        result = accum_add(prod, acc);
+    } else {
+        result = prod;
+    }
+    result.exp += scale;
+    return accum_round_float32(result, fp_status);
+}
+
+float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
+{
+    if (float32_is_zero(a) || float32_is_zero(b)) {
+        return float32_mul(a, b, fp_status);
+    }
+    return internal_fmafx(a, b, float32_zero, 0, fp_status);
+}
+
+float64 internal_mpyhh(float64 a, float64 b,
+                      unsigned long long int accumulated,
+                      float_status *fp_status)
+{
+    Accum x;
+    unsigned long long int prod;
+    unsigned int sticky;
+    uint8_t a_sign, b_sign;
+
+    sticky = accumulated & 1;
+    accumulated >>= 1;
+    accum_init(&x);
+    if (float64_is_zero(a) ||
+        float64_is_any_nan(a) ||
+        float64_is_infinity(a)) {
+        return float64_mul(a, b, fp_status);
+    }
+    if (float64_is_zero(b) ||
+        float64_is_any_nan(b) ||
+        float64_is_infinity(b)) {
+        return float64_mul(a, b, fp_status);
+    }
+    x.mant = int128_mul_6464(accumulated, 1);
+    x.sticky = sticky;
+    prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
+    x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
+    x.exp = float64_getexp(a) + float64_getexp(b) - DF_BIAS - 20;
+    if (!float64_is_normal(a) || !float64_is_normal(b)) {
+        /* crush to inexact zero */
+        x.sticky = 1;
+        x.exp = -4096;
+    }
+    a_sign = float64_is_neg(a);
+    b_sign = float64_is_neg(b);
+    x.sign = a_sign ^ b_sign;
+    return accum_round_float64(x, fp_status);
+}
-- 
2.7.4


  parent reply	other threads:[~2021-02-08  5:58 UTC|newest]

Thread overview: 101+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-08  5:45 [PATCH v8 00/35] Hexagon patch series Taylor Simpson
2021-02-08  5:45 ` [PATCH v8 01/35] Hexagon Update MAINTAINERS file Taylor Simpson
2021-02-08  5:45 ` [PATCH v8 02/35] Hexagon (target/hexagon) README Taylor Simpson
2021-02-08  5:45 ` [PATCH v8 03/35] Hexagon (include/elf.h) ELF machine definition Taylor Simpson
2021-02-08  5:45 ` [PATCH v8 04/35] Hexagon (target/hexagon) scalar core definition Taylor Simpson
2021-02-08  5:45 ` [PATCH v8 05/35] Hexagon (disas) disassembler Taylor Simpson
2021-02-08  5:45 ` [PATCH v8 06/35] Hexagon (target/hexagon) register names Taylor Simpson
2021-02-08  5:45 ` [PATCH v8 07/35] Hexagon (target/hexagon) scalar core helpers Taylor Simpson
2021-02-08  5:45 ` [PATCH v8 08/35] Hexagon (target/hexagon) GDB Stub Taylor Simpson
2021-02-08  5:45 ` [PATCH v8 09/35] Hexagon (target/hexagon) architecture types Taylor Simpson
2021-02-12  0:32   ` Philippe Mathieu-Daudé
2021-02-14 17:16   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 10/35] Hexagon (target/hexagon) instruction and packet types Taylor Simpson
2021-02-14 17:21   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 11/35] Hexagon (target/hexagon) register fields Taylor Simpson
2021-02-14 17:25   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 12/35] Hexagon (target/hexagon) instruction attributes Taylor Simpson
2021-02-14 18:00   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 13/35] Hexagon (target/hexagon) instruction/packet decode Taylor Simpson
2021-02-14 18:31   ` Richard Henderson
2021-03-14 17:23     ` Taylor Simpson
2021-02-08  5:46 ` [PATCH v8 14/35] Hexagon (target/hexagon) instruction printing Taylor Simpson
2021-02-14 18:36   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 15/35] Hexagon (target/hexagon/arch.[ch]) utility functions Taylor Simpson
2021-02-14 20:13   ` Richard Henderson
2021-03-18  3:57     ` Taylor Simpson
2021-02-08  5:46 ` [PATCH v8 16/35] Hexagon (target/hexagon/conv_emu.[ch]) " Taylor Simpson
2021-02-14 20:57   ` Richard Henderson
2021-03-18  3:57     ` Taylor Simpson
2021-03-18 13:30       ` Richard Henderson
2021-03-18 14:11         ` Taylor Simpson
2021-03-18 15:35           ` Richard Henderson
2021-03-18 18:03             ` Taylor Simpson
2021-03-18 18:32               ` Richard Henderson
2021-02-08  5:46 ` Taylor Simpson [this message]
2021-02-12  0:31   ` [PATCH v8 17/35] Hexagon (target/hexagon/fma_emu.[ch]) " Philippe Mathieu-Daudé
2021-02-14 23:14   ` Richard Henderson
2021-03-18  3:57     ` Taylor Simpson
2021-02-08  5:46 ` [PATCH v8 18/35] Hexagon (target/hexagon/imported) arch import Taylor Simpson
2021-02-08  5:46 ` [PATCH v8 19/35] Hexagon (target/hexagon) generator phase 1 - C preprocessor for semantics Taylor Simpson
2021-02-14 23:18   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 20/35] Hexagon (target/hexagon) generator phase 2 - generate header files Taylor Simpson
2021-02-14 23:20   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 21/35] Hexagon (target/hexagon) generator phase 3 - C preprocessor for decode tree Taylor Simpson
2021-02-14 23:24   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 22/35] Hexagon (target/hexagon) generater phase 4 - " Taylor Simpson
2021-02-14 23:26   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 23/35] Hexagon (target/hexagon) opcode data structures Taylor Simpson
2021-02-12  0:26   ` Philippe Mathieu-Daudé
2021-02-14 23:27   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 24/35] Hexagon (target/hexagon) macros Taylor Simpson
2021-02-14 23:36   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 25/35] Hexagon (target/hexagon) instruction classes Taylor Simpson
2021-02-14 23:41   ` Richard Henderson
2021-03-14  0:39     ` Taylor Simpson
2021-02-08  5:46 ` [PATCH v8 26/35] Hexagon (target/hexagon) TCG generation Taylor Simpson
2021-02-12  0:22   ` Philippe Mathieu-Daudé
2021-04-05 23:03     ` Taylor Simpson
2021-04-06  9:20       ` Alex Bennée
2021-04-06 15:44         ` Taylor Simpson
2021-02-15  0:06   ` Richard Henderson
2021-03-14  0:39     ` Taylor Simpson
2021-03-14  1:39       ` Richard Henderson
2021-03-15  3:09         ` Taylor Simpson
2021-02-08  5:46 ` [PATCH v8 27/35] Hexagon (target/hexagon) TCG for instructions with multiple definitions Taylor Simpson
2021-02-15  0:33   ` Richard Henderson
2021-03-14  0:41     ` Taylor Simpson
2021-03-14 18:02       ` Richard Henderson
2021-03-15  2:59         ` Taylor Simpson
2021-02-08  5:46 ` [PATCH v8 28/35] Hexagon (target/hexagon) TCG for floating point instructions Taylor Simpson
2021-02-15  0:34   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 29/35] Hexagon (target/hexagon) translation Taylor Simpson
2021-02-15  1:03   ` Richard Henderson
2021-03-14  0:40     ` Taylor Simpson
2021-03-14  1:44       ` Richard Henderson
2021-03-15  3:06         ` Taylor Simpson
2021-03-15 13:31           ` Richard Henderson
2021-03-15 22:19             ` Taylor Simpson
2021-02-08  5:46 ` [PATCH v8 30/35] Hexagon (linux-user/hexagon) Linux user emulation Taylor Simpson
2021-02-08  5:46 ` [PATCH v8 31/35] Hexagon (tests/tcg/hexagon) TCG tests - multiarch Taylor Simpson
2021-02-14 18:14   ` Philippe Mathieu-Daudé
2021-02-15  1:05   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 32/35] Hexagon (tests/tcg/hexagon) TCG tests - atomics/load/store/misc Taylor Simpson
2021-02-15  1:09   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 33/35] Hexagon (tests/tcg/hexagon) TCG tests - floating point Taylor Simpson
2021-02-15  1:10   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 34/35] Hexagon build infrastructure Taylor Simpson
2021-02-15  1:11   ` Richard Henderson
2021-02-08  5:46 ` [PATCH v8 35/35] Add Dockerfile for hexagon Taylor Simpson
2021-02-14 18:50   ` Philippe Mathieu-Daudé
2021-02-17 17:23     ` Alessandro Di Federico via
2021-02-17 17:33       ` Philippe Mathieu-Daudé
2021-02-27 14:10   ` Philippe Mathieu-Daudé
2021-02-27 14:32     ` Brian Cain
2021-02-08  6:25 ` [PATCH v8 00/35] Hexagon patch series no-reply
2021-02-15  1:23 ` Richard Henderson
2021-02-16 20:59   ` Taylor Simpson
2021-02-16 21:17     ` Peter Maydell
2021-02-17 14:23       ` Philippe Mathieu-Daudé
2021-02-17 17:05         ` Richard Henderson
2021-02-17 16:58     ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1612763186-18161-18-git-send-email-tsimpson@quicinc.com \
    --to=tsimpson@quicinc.com \
    --cc=ale@rev.ng \
    --cc=alex.bennee@linaro.org \
    --cc=bcain@quicinc.com \
    --cc=laurent@vivier.eu \
    --cc=philmd@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.