All of lore.kernel.org
 help / color / mirror / Atom feed
From: Taylor Simpson <tsimpson@quicinc.com>
To: qemu-devel@nongnu.org
Cc: ale@rev.ng, riku.voipio@iki.fi, richard.henderson@linaro.org,
	laurent@vivier.eu, tsimpson@quicinc.com, philmd@redhat.com,
	aleksandar.m.mail@gmail.com
Subject: [RFC PATCH v4 15/29] Hexagon (target/hexagon) utility functions
Date: Mon, 28 Sep 2020 12:28:44 -0500	[thread overview]
Message-ID: <1601314138-9930-16-git-send-email-tsimpson@quicinc.com> (raw)
In-Reply-To: <1601314138-9930-1-git-send-email-tsimpson@quicinc.com>

Utility functions called by various instructions

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/arch.h     |  42 +++
 target/hexagon/conv_emu.h |  50 +++
 target/hexagon/fma_emu.h  |  27 ++
 target/hexagon/arch.c     | 354 +++++++++++++++++++++
 target/hexagon/conv_emu.c | 369 ++++++++++++++++++++++
 target/hexagon/fma_emu.c  | 777 ++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 1619 insertions(+)
 create mode 100644 target/hexagon/arch.h
 create mode 100644 target/hexagon/conv_emu.h
 create mode 100644 target/hexagon/fma_emu.h
 create mode 100644 target/hexagon/arch.c
 create mode 100644 target/hexagon/conv_emu.c
 create mode 100644 target/hexagon/fma_emu.c

diff --git a/target/hexagon/arch.h b/target/hexagon/arch.h
new file mode 100644
index 0000000..82644ac
--- /dev/null
+++ b/target/hexagon/arch.h
@@ -0,0 +1,42 @@
+/*
+ *  Copyright(c) 2019-2020 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HEXAGON_ARCH_H
+#define HEXAGON_ARCH_H
+
+#include "cpu.h"
+#include "hex_arch_types.h"
+
+extern uint64_t interleave(uint32_t odd, uint32_t even);
+extern uint64_t deinterleave(uint64_t src);
+extern uint32_t carry_from_add64(uint64_t a, uint64_t b, uint32_t c);
+extern int32_t conv_round(int32_t a, int n);
+extern size16s_t cast8s_to_16s(int64_t a);
+extern int64_t cast16s_to_8s(size16s_t a);
+extern size16s_t add128(size16s_t a, size16s_t b);
+extern size16s_t sub128(size16s_t a, size16s_t b);
+extern size16s_t shiftr128(size16s_t a, uint32_t n);
+extern size16s_t shiftl128(size16s_t a, uint32_t n);
+extern size16s_t and128(size16s_t a, size16s_t b);
+extern void arch_fpop_start(CPUHexagonState *env);
+extern void arch_fpop_end(CPUHexagonState *env);
+extern void arch_raise_fpflag(unsigned int flags);
+extern int arch_sf_recip_common(int32_t *Rs, int32_t *Rt, int32_t *Rd,
+                                int *adjust);
+extern int arch_sf_invsqrt_common(int32_t *Rs, int32_t *Rd, int *adjust);
+
+#endif
diff --git a/target/hexagon/conv_emu.h b/target/hexagon/conv_emu.h
new file mode 100644
index 0000000..e713a12
--- /dev/null
+++ b/target/hexagon/conv_emu.h
@@ -0,0 +1,50 @@
+/*
+ *  Copyright(c) 2019-2020 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HEXAGON_CONV_EMU_H
+#define HEXAGON_CONV_EMU_H
+
+#include "hex_arch_types.h"
+
+extern uint64_t conv_sf_to_8u(float in);
+extern uint32_t conv_sf_to_4u(float in);
+extern int64_t conv_sf_to_8s(float in);
+extern int32_t conv_sf_to_4s(float in);
+
+extern uint64_t conv_df_to_8u(double in);
+extern uint32_t conv_df_to_4u(double in);
+extern int64_t conv_df_to_8s(double in);
+extern int32_t conv_df_to_4s(double in);
+
+extern double conv_8u_to_df(uint64_t in);
+extern double conv_4u_to_df(uint32_t in);
+extern double conv_8s_to_df(int64_t in);
+extern double conv_4s_to_df(int32_t in);
+
+extern float conv_8u_to_sf(uint64_t in);
+extern float conv_4u_to_sf(uint32_t in);
+extern float conv_8s_to_sf(int64_t in);
+extern float conv_4s_to_sf(int32_t in);
+
+extern float conv_df_to_sf(double in);
+
+static inline double conv_sf_to_df(float in)
+{
+    return in;
+}
+
+#endif
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
new file mode 100644
index 0000000..181b1f6
--- /dev/null
+++ b/target/hexagon/fma_emu.h
@@ -0,0 +1,27 @@
+/*
+ *  Copyright(c) 2019-2020 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HEXAGON_FMA_EMU_H
+#define HEXAGON_FMA_EMU_H
+
+extern float internal_fmafx(float a_in, float b_in, float c_in, int scale);
+extern float internal_fmaf(float a_in, float b_in, float c_in);
+extern float internal_mpyf(float a_in, float b_in);
+extern double internal_mpyhh(double a_in, double b_in,
+                             unsigned long long int accumulated);
+
+#endif
diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c
new file mode 100644
index 0000000..a9f864d
--- /dev/null
+++ b/target/hexagon/arch.c
@@ -0,0 +1,354 @@
+/*
+ *  Copyright(c) 2019-2020 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include <math.h>
+#include "fma_emu.h"
+#include "arch.h"
+#include "macros.h"
+
+#define BITS_MASK_8 0x5555555555555555ULL
+#define PAIR_MASK_8 0x3333333333333333ULL
+#define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL
+#define BYTE_MASK_8 0x00ff00ff00ff00ffULL
+#define HALF_MASK_8 0x0000ffff0000ffffULL
+#define WORD_MASK_8 0x00000000ffffffffULL
+
+uint64_t interleave(uint32_t odd, uint32_t even)
+{
+    /* Convert to long long */
+    uint64_t myodd = odd;
+    uint64_t myeven = even;
+    /* First, spread bits out */
+    myodd = (myodd | (myodd << 16)) & HALF_MASK_8;
+    myeven = (myeven | (myeven << 16)) & HALF_MASK_8;
+    myodd = (myodd | (myodd << 8)) & BYTE_MASK_8;
+    myeven = (myeven | (myeven << 8)) & BYTE_MASK_8;
+    myodd = (myodd | (myodd << 4)) & NYBL_MASK_8;
+    myeven = (myeven | (myeven << 4)) & NYBL_MASK_8;
+    myodd = (myodd | (myodd << 2)) & PAIR_MASK_8;
+    myeven = (myeven | (myeven << 2)) & PAIR_MASK_8;
+    myodd = (myodd | (myodd << 1)) & BITS_MASK_8;
+    myeven = (myeven | (myeven << 1)) & BITS_MASK_8;
+    /* Now OR together */
+    return myeven | (myodd << 1);
+}
+
+uint64_t deinterleave(uint64_t src)
+{
+    /* Get odd and even bits */
+    uint64_t myodd = ((src >> 1) & BITS_MASK_8);
+    uint64_t myeven = (src & BITS_MASK_8);
+
+    /* Unspread bits */
+    myeven = (myeven | (myeven >> 1)) & PAIR_MASK_8;
+    myodd = (myodd | (myodd >> 1)) & PAIR_MASK_8;
+    myeven = (myeven | (myeven >> 2)) & NYBL_MASK_8;
+    myodd = (myodd | (myodd >> 2)) & NYBL_MASK_8;
+    myeven = (myeven | (myeven >> 4)) & BYTE_MASK_8;
+    myodd = (myodd | (myodd >> 4)) & BYTE_MASK_8;
+    myeven = (myeven | (myeven >> 8)) & HALF_MASK_8;
+    myodd = (myodd | (myodd >> 8)) & HALF_MASK_8;
+    myeven = (myeven | (myeven >> 16)) & WORD_MASK_8;
+    myodd = (myodd | (myodd >> 16)) & WORD_MASK_8;
+
+    /* Return odd bits in upper half */
+    return myeven | (myodd << 32);
+}
+
+uint32_t carry_from_add64(uint64_t a, uint64_t b, uint32_t c)
+{
+    uint64_t tmpa, tmpb, tmpc;
+    tmpa = fGETUWORD(0, a);
+    tmpb = fGETUWORD(0, b);
+    tmpc = tmpa + tmpb + c;
+    tmpa = fGETUWORD(1, a);
+    tmpb = fGETUWORD(1, b);
+    tmpc = tmpa + tmpb + fGETUWORD(1, tmpc);
+    tmpc = fGETUWORD(1, tmpc);
+    return tmpc;
+}
+
+int32_t conv_round(int32_t a, int n)
+{
+    int64_t val;
+
+    if (n == 0) {
+        val = a;
+    } else if ((a & ((1 << (n - 1)) - 1)) == 0) {    /* N-1..0 all zero? */
+        /* Add LSB from int part */
+        val = ((fSE32_64(a)) + (int64_t) (((uint32_t) ((1 << n) & a)) >> 1));
+    } else {
+        val = ((fSE32_64(a)) + (1 << (n - 1)));
+    }
+
+    val = val >> n;
+    return (int32_t)val;
+}
+
+size16s_t cast8s_to_16s(int64_t a)
+{
+    size16s_t result = {.hi = 0, .lo = 0};
+    result.lo = a;
+    if (a < 0) {
+        result.hi = -1;
+    }
+    return result;
+}
+
+int64_t cast16s_to_8s(size16s_t a)
+{
+    return a.lo;
+}
+
+size16s_t add128(size16s_t a, size16s_t b)
+{
+    size16s_t result = {.hi = 0, .lo = 0};
+    result.lo = a.lo + b.lo;
+    result.hi = a.hi + b.hi;
+
+    if (result.lo < b.lo) {
+        result.hi++;
+    }
+
+    return result;
+}
+
+size16s_t sub128(size16s_t a, size16s_t b)
+{
+    size16s_t result = {.hi = 0, .lo = 0};
+    result.lo = a.lo - b.lo;
+    result.hi = a.hi - b.hi;
+    if (result.lo > a.lo) {
+        result.hi--;
+    }
+
+    return result;
+}
+
+size16s_t shiftr128(size16s_t a, uint32_t n)
+{
+    size16s_t result;
+    result.lo = (a.lo >> n) | (a.hi << (64 - n));
+    result.hi = a.hi >> n;
+    return result;
+}
+
+size16s_t shiftl128(size16s_t a, uint32_t n)
+{
+    size16s_t result;
+    result.lo = a.lo << n;
+    result.hi = (a.hi << n) | (a.lo >> (64 - n));
+    return result;
+}
+
+size16s_t and128(size16s_t a, size16s_t b)
+{
+    size16s_t result;
+    result.lo = a.lo & b.lo;
+    result.hi = a.hi & b.hi;
+    return result;
+}
+
+/* Floating Point Stuff */
+
+static const int roundingmodes[] = {
+    FE_TONEAREST,
+    FE_TOWARDZERO,
+    FE_DOWNWARD,
+    FE_UPWARD
+};
+
+void arch_fpop_start(CPUHexagonState *env)
+{
+    fegetenv(&env->fenv);
+    feclearexcept(FE_ALL_EXCEPT);
+    fesetround(roundingmodes[fREAD_REG_FIELD(USR, USR_FPRND)]);
+}
+
+#define NOTHING             /* Don't do anything */
+
+#define TEST_FLAG(LIBCF, MYF, MYE) \
+    do { \
+        if (fetestexcept(LIBCF)) { \
+            if (GET_USR_FIELD(USR_##MYF) == 0) { \
+                SET_USR_FIELD(USR_##MYF, 1); \
+                if (GET_USR_FIELD(USR_##MYE)) { \
+                    NOTHING \
+                } \
+            } \
+        } \
+    } while (0)
+
+void arch_fpop_end(CPUHexagonState *env)
+{
+    if (fetestexcept(FE_ALL_EXCEPT)) {
+        TEST_FLAG(FE_INEXACT, FPINPF, FPINPE);
+        TEST_FLAG(FE_DIVBYZERO, FPDBZF, FPDBZE);
+        TEST_FLAG(FE_INVALID, FPINVF, FPINVE);
+        TEST_FLAG(FE_OVERFLOW, FPOVFF, FPOVFE);
+        TEST_FLAG(FE_UNDERFLOW, FPUNFF, FPUNFE);
+    }
+    fesetenv(&env->fenv);
+}
+
+#undef TEST_FLAG
+
+
+void arch_raise_fpflag(unsigned int flags)
+{
+    feraiseexcept(flags);
+}
+
+int arch_sf_recip_common(int32_t *Rs, int32_t *Rt, int32_t *Rd, int *adjust)
+{
+    int n_class;
+    int d_class;
+    int n_exp;
+    int d_exp;
+    int ret = 0;
+    int32_t RsV, RtV, RdV;
+    int PeV = 0;
+    RsV = *Rs;
+    RtV = *Rt;
+    n_class = fpclassify(fFLOAT(RsV));
+    d_class = fpclassify(fFLOAT(RtV));
+    if ((n_class == FP_NAN) && (d_class == FP_NAN)) {
+        if (fGETBIT(22, RsV & RtV) == 0) {
+            fRAISEFLAGS(FE_INVALID);
+        }
+        RdV = RsV = RtV = fSFNANVAL();
+    } else if (n_class == FP_NAN) {
+        if (fGETBIT(22, RsV) == 0) {
+            fRAISEFLAGS(FE_INVALID);
+        }
+        RdV = RsV = RtV = fSFNANVAL();
+    } else if (d_class == FP_NAN) {
+        /* or put NaN in num/den fixup? */
+        if (fGETBIT(22, RtV) == 0) {
+            fRAISEFLAGS(FE_INVALID);
+        }
+        RdV = RsV = RtV = fSFNANVAL();
+    } else if ((n_class == FP_INFINITE) && (d_class == FP_INFINITE)) {
+        /* or put Inf in num fixup? */
+        RdV = RsV = RtV = fSFNANVAL();
+        fRAISEFLAGS(FE_INVALID);
+    } else if ((n_class == FP_ZERO) && (d_class == FP_ZERO)) {
+        /* or put zero in num fixup? */
+        RdV = RsV = RtV = fSFNANVAL();
+        fRAISEFLAGS(FE_INVALID);
+    } else if (d_class == FP_ZERO) {
+        /* or put Inf in num fixup? */
+        RsV = fSFINFVAL(RsV ^ RtV);
+        RtV = fSFONEVAL(0);
+        RdV = fSFONEVAL(0);
+        if (n_class != FP_INFINITE) {
+            fRAISEFLAGS(FE_DIVBYZERO);
+        }
+    } else if (d_class == FP_INFINITE) {
+        RsV = 0x80000000 & (RsV ^ RtV);
+        RtV = fSFONEVAL(0);
+        RdV = fSFONEVAL(0);
+    } else if (n_class == FP_ZERO) {
+        /* Does this just work itself out? */
+        /* No, 0/Inf causes problems. */
+        RsV = 0x80000000 & (RsV ^ RtV);
+        RtV = fSFONEVAL(0);
+        RdV = fSFONEVAL(0);
+    } else if (n_class == FP_INFINITE) {
+        /* Does this just work itself out? */
+        RsV = fSFINFVAL(RsV ^ RtV);
+        RtV = fSFONEVAL(0);
+        RdV = fSFONEVAL(0);
+    } else {
+        PeV = 0x00;
+        /* Basic checks passed */
+        n_exp = fSF_GETEXP(RsV);
+        d_exp = fSF_GETEXP(RtV);
+        if ((n_exp - d_exp + fSF_BIAS()) <= fSF_MANTBITS()) {
+            /* Near quotient underflow / inexact Q */
+            PeV = 0x80;
+            RtV = fSF_MUL_POW2(RtV, -64);
+            RsV = fSF_MUL_POW2(RsV, 64);
+        } else if ((n_exp - d_exp + fSF_BIAS()) > (fSF_MAXEXP() - 24)) {
+            /* Near quotient overflow */
+            PeV = 0x40;
+            RtV = fSF_MUL_POW2(RtV, 32);
+            RsV = fSF_MUL_POW2(RsV, -32);
+        } else if (n_exp <= fSF_MANTBITS() + 2) {
+            RtV = fSF_MUL_POW2(RtV, 64);
+            RsV = fSF_MUL_POW2(RsV, 64);
+        } else if (d_exp <= 1) {
+            RtV = fSF_MUL_POW2(RtV, 32);
+            RsV = fSF_MUL_POW2(RsV, 32);
+        } else if (d_exp > 252) {
+            RtV = fSF_MUL_POW2(RtV, -32);
+            RsV = fSF_MUL_POW2(RsV, -32);
+        }
+        RdV = 0;
+        ret = 1;
+    }
+    *Rs = RsV;
+    *Rt = RtV;
+    *Rd = RdV;
+    *adjust = PeV;
+    return ret;
+}
+
+int arch_sf_invsqrt_common(int32_t *Rs, int32_t *Rd, int *adjust)
+{
+    int r_class;
+    int32_t RsV, RdV;
+    int PeV = 0;
+    int r_exp;
+    int ret = 0;
+    RsV = *Rs;
+    r_class = fpclassify(fFLOAT(RsV));
+    if (r_class == FP_NAN) {
+        if (fGETBIT(22, RsV) == 0) {
+            fRAISEFLAGS(FE_INVALID);
+        }
+        RdV = RsV = fSFNANVAL();
+    } else if (fFLOAT(RsV) < 0.0) {
+        /* Negative nonzero values are NaN */
+        fRAISEFLAGS(FE_INVALID);
+        RsV = fSFNANVAL();
+        RdV = fSFNANVAL();
+    } else if (r_class == FP_INFINITE) {
+        /* or put Inf in num fixup? */
+        RsV = fSFINFVAL(-1);
+        RdV = fSFINFVAL(-1);
+    } else if (r_class == FP_ZERO) {
+        /* or put zero in num fixup? */
+        RdV = fSFONEVAL(0);
+    } else {
+        PeV = 0x00;
+        /* Basic checks passed */
+        r_exp = fSF_GETEXP(RsV);
+        if (r_exp <= 24) {
+            RsV = fSF_MUL_POW2(RsV, 64);
+            PeV = 0xe0;
+        }
+        RdV = 0;
+        ret = 1;
+    }
+    *Rs = RsV;
+    *Rd = RdV;
+    *adjust = PeV;
+    return ret;
+}
+
diff --git a/target/hexagon/conv_emu.c b/target/hexagon/conv_emu.c
new file mode 100644
index 0000000..22fc647
--- /dev/null
+++ b/target/hexagon/conv_emu.c
@@ -0,0 +1,369 @@
+/*
+ *  Copyright(c) 2019-2020 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+#include "qemu/osdep.h"
+#include "hex_arch_types.h"
+#include "macros.h"
+#include "conv_emu.h"
+
+#define isz(X) (fpclassify(X) == FP_ZERO)
+#define DF_BIAS 1023
+#define SF_BIAS 127
+
+#define LL_MAX_POS 0x7fffffffffffffffULL
+#define MAX_POS 0x7fffffffU
+
+#ifdef VCPP
+/*
+ * Visual C isn't GNU C and doesn't have __builtin_clzll
+ */
+
+static int __builtin_clzll(unsigned long long int input)
+{
+    int total = 0;
+    if (input == 0) {
+        return 64;
+    }
+    total += ((input >> (total + 32)) != 0) ? 32 : 0;
+    total += ((input >> (total + 16)) != 0) ? 16 : 0;
+    total += ((input >> (total +  8)) != 0) ?  8 : 0;
+    total += ((input >> (total +  4)) != 0) ?  4 : 0;
+    total += ((input >> (total +  2)) != 0) ?  2 : 0;
+    total += ((input >> (total +  1)) != 0) ?  1 : 0;
+    return 63 - total;
+}
+#endif
+
+typedef union {
+    double f;
+    uint64_t i;
+    struct {
+        uint64_t mant:52;
+        uint64_t exp:11;
+        uint64_t sign:1;
+    } x;
+} df_t;
+
+
+typedef union {
+    float f;
+    uint32_t i;
+    struct {
+        uint32_t mant:23;
+        uint32_t exp:8;
+        uint32_t sign:1;
+    } x;
+} sf_t;
+
+
+#define MAKE_CONV_8U_TO_XF_N(FLOATID, BIGFLOATID, RETTYPE) \
+static RETTYPE conv_8u_to_##FLOATID##_n(uint64_t in, int negate) \
+{ \
+    FLOATID##_t x; \
+    uint64_t tmp, truncbits, shamt; \
+    int leading_zeros; \
+    if (in == 0) { \
+        return 0.0; \
+    } \
+    leading_zeros = __builtin_clzll(in); \
+    tmp = in << (leading_zeros); \
+    tmp <<= 1; \
+    shamt = 64 - f##BIGFLOATID##_MANTBITS(); \
+    truncbits = tmp & ((1ULL << (shamt)) - 1); \
+    tmp >>= shamt; \
+    if (truncbits != 0) { \
+        feraiseexcept(FE_INEXACT); \
+        switch (fegetround()) { \
+        case FE_TOWARDZERO: \
+            break; \
+        case FE_DOWNWARD: \
+            if (negate) { \
+                tmp += 1; \
+            } \
+            break; \
+        case FE_UPWARD: \
+            if (!negate) { \
+                tmp += 1; \
+            } \
+            break; \
+        default: \
+            if ((truncbits & ((1ULL << (shamt - 1)) - 1)) == 0) { \
+                tmp += (tmp & 1); \
+            } else { \
+                tmp += ((truncbits >> (shamt - 1)) & 1); \
+            } \
+            break; \
+        } \
+    } \
+    if (((tmp << shamt) >> shamt) != tmp) { \
+        leading_zeros--; \
+    } \
+    x.x.mant = tmp; \
+    x.x.exp = BIGFLOATID##_BIAS + f##BIGFLOATID##_MANTBITS() - \
+              leading_zeros + shamt - 1; \
+    x.x.sign = negate; \
+    return x.f; \
+}
+
+MAKE_CONV_8U_TO_XF_N(df, DF, double)
+MAKE_CONV_8U_TO_XF_N(sf, SF, float)
+
+double conv_8u_to_df(uint64_t in)
+{
+    return conv_8u_to_df_n(in, 0);
+}
+
+double conv_8s_to_df(int64_t in)
+{
+    if (in == 0x8000000000000000) {
+        return -0x1p63;
+    }
+    if (in < 0) {
+        return conv_8u_to_df_n(-in, 1);
+    } else {
+        return conv_8u_to_df_n(in, 0);
+    }
+}
+
+double conv_4u_to_df(uint32_t in)
+{
+    return conv_8u_to_df((uint64_t) in);
+}
+
+double conv_4s_to_df(int32_t in)
+{
+    return conv_8s_to_df(in);
+}
+
+float conv_8u_to_sf(uint64_t in)
+{
+    return conv_8u_to_sf_n(in, 0);
+}
+
+float conv_8s_to_sf(int64_t in)
+{
+    if (in == 0x8000000000000000) {
+        return -0x1p63;
+    }
+    if (in < 0) {
+        return conv_8u_to_sf_n(-in, 1);
+    } else {
+        return conv_8u_to_sf_n(in, 0);
+    }
+}
+
+float conv_4u_to_sf(uint32_t in)
+{
+    return conv_8u_to_sf(in);
+}
+
+float conv_4s_to_sf(int32_t in)
+{
+    return conv_8s_to_sf(in);
+}
+
+
+static uint64_t conv_df_to_8u_n(double in, int will_negate)
+{
+    df_t x;
+    int fracshift, endshift;
+    uint64_t tmp, truncbits;
+    x.f = in;
+    if (isinf(in)) {
+        feraiseexcept(FE_INVALID);
+        if (in > 0.0) {
+            return ~0ULL;
+        } else {
+            return 0ULL;
+        }
+    }
+    if (isnan(in)) {
+        feraiseexcept(FE_INVALID);
+        return ~0ULL;
+    }
+    if (isz(in)) {
+        return 0;
+    }
+    if (x.x.sign) {
+        feraiseexcept(FE_INVALID);
+        return 0;
+    }
+    if (in < 0.5) {
+        /* Near zero, captures large fracshifts, denorms, etc */
+        feraiseexcept(FE_INEXACT);
+        switch (fegetround()) {
+        case FE_DOWNWARD:
+            if (will_negate) {
+                return 1;
+            } else {
+                return 0;
+            }
+        case FE_UPWARD:
+            if (!will_negate) {
+                return 1;
+            } else {
+                return 0;
+            }
+        default:
+            return 0;    /* nearest or towards zero */
+        }
+    }
+    if ((x.x.exp - DF_BIAS) >= 64) {
+        /* way too big */
+        feraiseexcept(FE_INVALID);
+        return ~0ULL;
+    }
+    fracshift = fMAX(0, (fDF_MANTBITS() - (x.x.exp - DF_BIAS)));
+    endshift = fMAX(0, ((x.x.exp - DF_BIAS - fDF_MANTBITS())));
+    tmp = x.x.mant | (1ULL << fDF_MANTBITS());
+    truncbits = tmp & ((1ULL << fracshift) - 1);
+    tmp >>= fracshift;
+    if (truncbits) {
+        /* Apply Rounding */
+        feraiseexcept(FE_INEXACT);
+        switch (fegetround()) {
+        case FE_TOWARDZERO:
+            break;
+        case FE_DOWNWARD:
+            if (will_negate) {
+                tmp += 1;
+            }
+            break;
+        case FE_UPWARD:
+            if (!will_negate) {
+                tmp += 1;
+            }
+            break;
+        default:
+            if ((truncbits & ((1ULL << (fracshift - 1)) - 1)) == 0) {
+                /* Exactly .5 */
+                tmp += (tmp & 1);
+            } else {
+                tmp += ((truncbits >> (fracshift - 1)) & 1);
+            }
+        }
+    }
+    /*
+     * If we added one and it carried all the way out,
+     * check to see if overflow
+     */
+    if ((tmp & ((1ULL << (fDF_MANTBITS() + 1)) - 1)) == 0) {
+        if ((x.x.exp - DF_BIAS) == 63) {
+            feclearexcept(FE_INEXACT);
+            feraiseexcept(FE_INVALID);
+            return ~0ULL;
+        }
+    }
+    tmp <<= endshift;
+    return tmp;
+}
+
+static uint32_t conv_df_to_4u_n(double in, int will_negate)
+{
+    uint64_t tmp;
+    tmp = conv_df_to_8u_n(in, will_negate);
+    if (tmp > 0x00000000ffffffffULL) {
+        feclearexcept(FE_INEXACT);
+        feraiseexcept(FE_INVALID);
+        return ~0U;
+    }
+    return (uint32_t)tmp;
+}
+
+uint64_t conv_df_to_8u(double in)
+{
+    return conv_df_to_8u_n(in, 0);
+}
+
+uint32_t conv_df_to_4u(double in)
+{
+    return conv_df_to_4u_n(in, 0);
+}
+
+int64_t conv_df_to_8s(double in)
+{
+    uint64_t tmp;
+    df_t x;
+    x.f = in;
+    if (isnan(in)) {
+        feraiseexcept(FE_INVALID);
+        return -1;
+    }
+    if (x.x.sign) {
+        tmp = conv_df_to_8u_n(-in, 1);
+    } else {
+        tmp = conv_df_to_8u_n(in, 0);
+    }
+    if (tmp > (LL_MAX_POS + x.x.sign)) {
+        feclearexcept(FE_INEXACT);
+        feraiseexcept(FE_INVALID);
+        tmp = (LL_MAX_POS + x.x.sign);
+    }
+    if (x.x.sign) {
+        return -tmp;
+    } else {
+        return tmp;
+    }
+}
+
+int32_t conv_df_to_4s(double in)
+{
+    uint64_t tmp;
+    df_t x;
+    x.f = in;
+    if (isnan(in)) {
+        feraiseexcept(FE_INVALID);
+        return -1;
+    }
+    if (x.x.sign) {
+        tmp = conv_df_to_8u_n(-in, 1);
+    } else {
+        tmp = conv_df_to_8u_n(in, 0);
+    }
+    if (tmp > (MAX_POS + x.x.sign)) {
+        feclearexcept(FE_INEXACT);
+        feraiseexcept(FE_INVALID);
+        tmp = (MAX_POS + x.x.sign);
+    }
+    if (x.x.sign) {
+        return -tmp;
+    } else {
+        return tmp;
+    }
+}
+
+uint64_t conv_sf_to_8u(float in)
+{
+    return conv_df_to_8u(in);
+}
+
+uint32_t conv_sf_to_4u(float in)
+{
+    return conv_df_to_4u(in);
+}
+
+int64_t conv_sf_to_8s(float in)
+{
+    return conv_df_to_8s(in);
+}
+
+int32_t conv_sf_to_4s(float in)
+{
+    return conv_df_to_4s(in);
+}
+
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
new file mode 100644
index 0000000..6a4d4e1
--- /dev/null
+++ b/target/hexagon/fma_emu.c
@@ -0,0 +1,777 @@
+/*
+ *  Copyright(c) 2019-2020 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+#include "qemu/osdep.h"
+#include "macros.h"
+#include "conv_emu.h"
+#include "fma_emu.h"
+
+#define DF_INF_EXP 0x7ff
+#define DF_BIAS 1023
+
+#define SF_INF_EXP 0xff
+#define SF_BIAS 127
+
+#define HF_INF_EXP 0x1f
+#define HF_BIAS 15
+
+#define WAY_BIG_EXP 4096
+
+#define isz(X) (fpclassify(X) == FP_ZERO)
+
+typedef union {
+    double f;
+    uint64_t i;
+    struct {
+        uint64_t mant:52;
+        uint64_t exp:11;
+        uint64_t sign:1;
+    } x;
+} df_t;
+
+typedef union {
+    float f;
+    uint32_t i;
+    struct {
+        uint32_t mant:23;
+        uint32_t exp:8;
+        uint32_t sign:1;
+    } x;
+} sf_t;
+
+typedef struct {
+    union {
+        uint64_t low;
+        struct {
+            uint32_t w0;
+            uint32_t w1;
+        };
+    };
+    union {
+        uint64_t high;
+        struct {
+            uint32_t w3;
+            uint32_t w2;
+        };
+    };
+} int128_t;
+typedef struct {
+    int128_t mant;
+    int32_t exp;
+    uint8_t sign;
+    uint8_t guard;
+    uint8_t round;
+    uint8_t sticky;
+} xf_t;
+
+static inline void xf_init(xf_t *p)
+{
+    p->mant.low = 0;
+    p->mant.high = 0;
+    p->exp = 0;
+    p->sign = 0;
+    p->guard = 0;
+    p->round = 0;
+    p->sticky = 0;
+}
+
+static inline uint64_t df_getmant(df_t a)
+{
+    int class = fpclassify(a.f);
+    switch (class) {
+    case FP_NORMAL:
+    return a.x.mant | 1ULL << 52;
+    case FP_ZERO:
+        return 0;
+    case FP_SUBNORMAL:
+        return a.x.mant;
+    default:
+        return -1;
+    };
+}
+
+static inline int32_t df_getexp(df_t a)
+{
+    int class = fpclassify(a.f);
+    switch (class) {
+    case FP_NORMAL:
+        return a.x.exp;
+    case FP_SUBNORMAL:
+        return a.x.exp + 1;
+    default:
+        return -1;
+    };
+}
+
+static inline uint64_t sf_getmant(sf_t a)
+{
+    int class = fpclassify(a.f);
+    switch (class) {
+    case FP_NORMAL:
+        return a.x.mant | 1ULL << 23;
+    case FP_ZERO:
+        return 0;
+    case FP_SUBNORMAL:
+        return a.x.mant | 0ULL;
+    default:
+        return -1;
+    };
+}
+
+static inline int32_t sf_getexp(sf_t a)
+{
+    int class = fpclassify(a.f);
+    switch (class) {
+    case FP_NORMAL:
+        return a.x.exp;
+    case FP_SUBNORMAL:
+        return a.x.exp + 1;
+    default:
+        return -1;
+    };
+}
+
+static inline int128_t int128_mul_6464(uint64_t ai, uint64_t bi)
+{
+    int128_t ret;
+    int128_t a, b;
+    uint64_t pp0, pp1a, pp1b, pp1s, pp2;
+
+    a.high = b.high = 0;
+    a.low = ai;
+    b.low = bi;
+    pp0 = (uint64_t)a.w0 * (uint64_t)b.w0;
+    pp1a = (uint64_t)a.w1 * (uint64_t)b.w0;
+    pp1b = (uint64_t)b.w1 * (uint64_t)a.w0;
+    pp2 = (uint64_t)a.w1 * (uint64_t)b.w1;
+
+    pp1s = pp1a + pp1b;
+    if ((pp1s < pp1a) || (pp1s < pp1b)) {
+        pp2 += (1ULL << 32);
+    }
+    ret.low = pp0 + (pp1s << 32);
+    if ((ret.low < pp0) || (ret.low < (pp1s << 32))) {
+        pp2 += 1;
+    }
+    ret.high = pp2 + (pp1s >> 32);
+
+    return ret;
+}
+
+static inline int128_t int128_shl(int128_t a, uint32_t amt)
+{
+    int128_t ret;
+    if (amt == 0) {
+        return a;
+    }
+    if (amt > 128) {
+        ret.high = 0;
+        ret.low = 0;
+        return ret;
+    }
+    if (amt >= 64) {
+        amt -= 64;
+        a.high = a.low;
+        a.low = 0;
+    }
+    ret.high = a.high << amt;
+    ret.high |= (a.low >> (64 - amt));
+    ret.low = a.low << amt;
+    return ret;
+}
+
+static inline int128_t int128_shr(int128_t a, uint32_t amt)
+{
+    int128_t ret;
+    if (amt == 0) {
+        return a;
+    }
+    if (amt > 128) {
+        ret.high = 0;
+        ret.low = 0;
+        return ret;
+    }
+    if (amt >= 64) {
+        amt -= 64;
+        a.low = a.high;
+        a.high = 0;
+    }
+    ret.low = a.low >> amt;
+    ret.low |= (a.high << (64 - amt));
+    ret.high = a.high >> amt;
+    return ret;
+}
+
+static inline int128_t int128_add(int128_t a, int128_t b)
+{
+    int128_t ret;
+    ret.low = a.low + b.low;
+    if ((ret.low < a.low) || (ret.low < b.low)) {
+        /* carry into high part */
+        a.high += 1;
+    }
+    ret.high = a.high + b.high;
+    return ret;
+}
+
+static inline int128_t int128_sub(int128_t a, int128_t b, int borrow)
+{
+    int128_t ret;
+    ret.low = a.low - b.low;
+    if (ret.low > a.low) {
+        /* borrow into high part */
+        a.high -= 1;
+    }
+    ret.high = a.high - b.high;
+    if (borrow == 0) {
+        return ret;
+    } else {
+        a.high = 0;
+        a.low = 1;
+        return int128_sub(ret, a, 0);
+    }
+}
+
+static inline int int128_gt(int128_t a, int128_t b)
+{
+    if (a.high == b.high) {
+        return a.low > b.low;
+    }
+    return a.high > b.high;
+}
+
+static inline xf_t xf_norm_left(xf_t a)
+{
+    a.exp--;
+    a.mant = int128_shl(a.mant, 1);
+    a.mant.low |= a.guard;
+    a.guard = a.round;
+    a.round = a.sticky;
+    return a;
+}
+
+static inline xf_t xf_norm_right(xf_t a, int amt)
+{
+    if (amt > 130) {
+        a.sticky |=
+            a.round | a.guard | (a.mant.low != 0) | (a.mant.high != 0);
+        a.guard = a.round = a.mant.high = a.mant.low = 0;
+        a.exp += amt;
+        return a;
+
+    }
+    while (amt >= 64) {
+        a.sticky |= a.round | a.guard | (a.mant.low != 0);
+        a.guard = (a.mant.low >> 63) & 1;
+        a.round = (a.mant.low >> 62) & 1;
+        a.mant.low = a.mant.high;
+        a.mant.high = 0;
+        a.exp += 64;
+        amt -= 64;
+    }
+    while (amt > 0) {
+        a.exp++;
+        a.sticky |= a.round;
+        a.round = a.guard;
+        a.guard = a.mant.low & 1;
+        a.mant = int128_shr(a.mant, 1);
+        amt--;
+    }
+    return a;
+}
+
+
+/*
+ * On the add/sub, we need to be able to shift out lots of bits, but need a
+ * sticky bit for what was shifted out, I think.
+ */
+static xf_t xf_add(xf_t a, xf_t b);
+
+static inline xf_t xf_sub(xf_t a, xf_t b, int negate)
+{
+    xf_t ret;
+    xf_init(&ret);
+    int borrow;
+
+    if (a.sign != b.sign) {
+        b.sign = !b.sign;
+        return xf_add(a, b);
+    }
+    if (b.exp > a.exp) {
+        /* small - big == - (big - small) */
+        return xf_sub(b, a, !negate);
+    }
+    if ((b.exp == a.exp) && (int128_gt(b.mant, a.mant))) {
+        /* small - big == - (big - small) */
+        return xf_sub(b, a, !negate);
+    }
+
+    while (a.exp > b.exp) {
+        /* Try to normalize exponents: shrink a exponent and grow mantissa */
+        if (a.mant.high & (1ULL << 62)) {
+            /* Can't grow a any more */
+            break;
+        } else {
+            a = xf_norm_left(a);
+        }
+    }
+
+    while (a.exp > b.exp) {
+        /* Try to normalize exponents: grow b exponent and shrink mantissa */
+        /* Keep around shifted out bits... we might need those later */
+        b = xf_norm_right(b, a.exp - b.exp);
+    }
+
+    if ((int128_gt(b.mant, a.mant))) {
+        return xf_sub(b, a, !negate);
+    }
+
+    /* OK, now things should be normalized! */
+    ret.sign = a.sign;
+    ret.exp = a.exp;
+    assert(!int128_gt(b.mant, a.mant));
+    borrow = (b.round << 2) | (b.guard << 1) | b.sticky;
+    ret.mant = int128_sub(a.mant, b.mant, (borrow != 0));
+    borrow = 0 - borrow;
+    ret.guard = (borrow >> 2) & 1;
+    ret.round = (borrow >> 1) & 1;
+    ret.sticky = (borrow >> 0) & 1;
+    if (negate) {
+        ret.sign = !ret.sign;
+    }
+    return ret;
+}
+
+static xf_t xf_add(xf_t a, xf_t b)
+{
+    xf_t ret;
+    xf_init(&ret);
+    if (a.sign != b.sign) {
+        b.sign = !b.sign;
+        return xf_sub(a, b, 0);
+    }
+    if (b.exp > a.exp) {
+        /* small + big ==  (big + small) */
+        return xf_add(b, a);
+    }
+    if ((b.exp == a.exp) && int128_gt(b.mant, a.mant)) {
+        /* small + big ==  (big + small) */
+        return xf_add(b, a);
+    }
+
+    while (a.exp > b.exp) {
+        /* Try to normalize exponents: shrink a exponent and grow mantissa */
+        if (a.mant.high & (1ULL << 62)) {
+            /* Can't grow a any more */
+            break;
+        } else {
+            a = xf_norm_left(a);
+        }
+    }
+
+    while (a.exp > b.exp) {
+        /* Try to normalize exponents: grow b exponent and shrink mantissa */
+        /* Keep around shifted out bits... we might need those later */
+        b = xf_norm_right(b, a.exp - b.exp);
+    }
+
+    /* OK, now things should be normalized! */
+    if (int128_gt(b.mant, a.mant)) {
+        return xf_add(b, a);
+    };
+    ret.sign = a.sign;
+    ret.exp = a.exp;
+    assert(!int128_gt(b.mant, a.mant));
+    ret.mant = int128_add(a.mant, b.mant);
+    ret.guard = b.guard;
+    ret.round = b.round;
+    ret.sticky = b.sticky;
+    return ret;
+}
+
+/* Return an infinity with the same sign as a */
+static inline df_t infinite_df_t(xf_t a)
+{
+    df_t ret;
+    ret.x.sign = a.sign;
+    ret.x.exp = DF_INF_EXP;
+    ret.x.mant = 0ULL;
+    return ret;
+}
+
+/* Return a maximum finite value with the same sign as a */
+static inline df_t maxfinite_df_t(xf_t a)
+{
+    df_t ret;
+    ret.x.sign = a.sign;
+    ret.x.exp = DF_INF_EXP - 1;
+    ret.x.mant = 0x000fffffffffffffULL;
+    return ret;
+}
+
+static inline df_t f2df_t(double in)
+{
+    df_t ret;
+    ret.f = in;
+    return ret;
+}
+
+/* Return an infinity with the same sign as a */
+static inline sf_t infinite_sf_t(xf_t a)
+{
+    sf_t ret;
+    ret.x.sign = a.sign;
+    ret.x.exp = SF_INF_EXP;
+    ret.x.mant = 0ULL;
+    return ret;
+}
+
+/* Return a maximum finite value with the same sign as a */
+static inline sf_t maxfinite_sf_t(xf_t a)
+{
+    sf_t ret;
+    ret.x.sign = a.sign;
+    ret.x.exp = SF_INF_EXP - 1;
+    ret.x.mant = 0x007fffffUL;
+    return ret;
+}
+
+static inline sf_t f2sf_t(float in)
+{
+    sf_t ret;
+    ret.f = in;
+    return ret;
+}
+
+#define GEN_XF_ROUND(TYPE, MANTBITS, INF_EXP) \
+static inline TYPE xf_round_##TYPE(xf_t a) \
+{ \
+    TYPE ret; \
+    ret.i = 0; \
+    ret.x.sign = a.sign; \
+    if ((a.mant.high == 0) && (a.mant.low == 0) \
+        && ((a.guard | a.round | a.sticky) == 0)) { \
+        /* result zero */ \
+        switch (fegetround()) { \
+        case FE_DOWNWARD: \
+            return f2##TYPE(-0.0); \
+        default: \
+            return f2##TYPE(0.0); \
+        } \
+    } \
+    /* Normalize right */ \
+    /* We want MANTBITS bits of mantissa plus the leading one. */ \
+    /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
+    /* So we need to normalize right while the high word is non-zero and \
+    * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
+    while ((a.mant.high != 0) || ((a.mant.low >> (MANTBITS + 1)) != 0)) { \
+        a = xf_norm_right(a, 1); \
+    } \
+    /* \
+     * OK, now normalize left \
+     * We want to normalize left until we have a leading one in bit 24 \
+     * Theoretically, we only need to shift a maximum of one to the left if we \
+     * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
+     * shoudl be 0  \
+     */ \
+    while ((a.mant.low & (1ULL << MANTBITS)) == 0) { \
+        a = xf_norm_left(a); \
+    } \
+    /* \
+     * OK, now we might need to denormalize because of potential underflow. \
+     * We need to do this before rounding, and rounding might make us normal \
+     * again \
+     */ \
+    while (a.exp <= 0) { \
+        a = xf_norm_right(a, 1 - a.exp); \
+        /* \
+         * Do we have underflow? \
+         * That's when we get an inexact answer because we ran out of bits \
+         * in a denormal. \
+         */ \
+        if (a.guard || a.round || a.sticky) { \
+            feraiseexcept(FE_UNDERFLOW); \
+        } \
+    } \
+    /* OK, we're relatively canonical... now we need to round */ \
+    if (a.guard || a.round || a.sticky) { \
+        feraiseexcept(FE_INEXACT); \
+        switch (fegetround()) { \
+        case FE_TOWARDZERO: \
+            /* Chop and we're done */ \
+            break; \
+        case FE_UPWARD: \
+            if (a.sign == 0) { \
+                a.mant.low += 1; \
+            } \
+            break; \
+        case FE_DOWNWARD: \
+            if (a.sign != 0) { \
+                a.mant.low += 1; \
+            } \
+            break; \
+        default: \
+            if (a.round || a.sticky) { \
+                /* round up if guard is 1, down if guard is zero */ \
+                a.mant.low += a.guard; \
+            } else if (a.guard) { \
+                /* exactly .5, round up if odd */ \
+                a.mant.low += (a.mant.low & 1); \
+            } \
+            break; \
+        } \
+    } \
+    /* \
+     * OK, now we might have carried all the way up. \
+     * So we might need to shr once \
+     * at least we know that the lsb should be zero if we rounded and \
+     * got a carry out... \
+     */ \
+    if ((a.mant.low >> (MANTBITS + 1)) != 0) { \
+        a = xf_norm_right(a, 1); \
+    } \
+    /* Overflow? */ \
+    if (a.exp >= INF_EXP) { \
+        /* Yep, inf result */ \
+        feraiseexcept(FE_OVERFLOW); \
+        feraiseexcept(FE_INEXACT); \
+        switch (fegetround()) { \
+        case FE_TOWARDZERO: \
+            return maxfinite_##TYPE(a); \
+        case FE_UPWARD: \
+            if (a.sign == 0) { \
+                return infinite_##TYPE(a); \
+            } else { \
+                return maxfinite_##TYPE(a); \
+            } \
+        case FE_DOWNWARD: \
+            if (a.sign != 0) { \
+                return infinite_##TYPE(a); \
+            } else { \
+                return maxfinite_##TYPE(a); \
+            } \
+        default: \
+            return infinite_##TYPE(a); \
+        } \
+    } \
+    /* Underflow? */ \
+    if (a.mant.low & (1ULL << MANTBITS)) { \
+        /* Leading one means: No, we're normal. So, we should be done... */ \
+        ret.x.exp = a.exp; \
+        ret.x.mant = a.mant.low; \
+        return ret; \
+    } \
+    if (a.exp != 1) { \
+        printf("a.exp == %d\n", a.exp); \
+    } \
+    assert(a.exp == 1); \
+    ret.x.exp = 0; \
+    ret.x.mant = a.mant.low; \
+    return ret; \
+}
+
+GEN_XF_ROUND(df_t, fDF_MANTBITS(), DF_INF_EXP)
+GEN_XF_ROUND(sf_t, fSF_MANTBITS(), SF_INF_EXP)
+
+static inline double special_fma(df_t a, df_t b, df_t c)
+{
+    df_t ret;
+    ret.i = 0;
+
+    /*
+     * If A multiplied by B is an exact infinity and C is also an infinity
+     * but with the opposite sign, FMA returns NaN and raises invalid.
+     */
+    if (fISINFPROD(a.f, b.f) && isinf(c.f)) {
+        if ((a.x.sign ^ b.x.sign) != c.x.sign) {
+            ret.i = fDFNANVAL();
+            feraiseexcept(FE_INVALID);
+            return ret.f;
+        }
+    }
+    if ((isinf(a.f) && isz(b.f)) || (isz(a.f) && isinf(b.f))) {
+        ret.i = fDFNANVAL();
+        feraiseexcept(FE_INVALID);
+        return ret.f;
+    }
+    /*
+     * If none of the above checks are true and C is a NaN,
+     * a NaN shall be returned
+     * If A or B are NaN, a NAN shall be returned.
+     */
+    if (isnan(a.f) || isnan(b.f) || (isnan(c.f))) {
+        if (isnan(a.f) && (fGETBIT(51, a.i) == 0)) {
+            feraiseexcept(FE_INVALID);
+        }
+        if (isnan(b.f) && (fGETBIT(51, b.i) == 0)) {
+            feraiseexcept(FE_INVALID);
+        }
+        if (isnan(c.f) && (fGETBIT(51, c.i) == 0)) {
+            feraiseexcept(FE_INVALID);
+        }
+        ret.i = fDFNANVAL();
+        return ret.f;
+    }
+    /*
+     * We have checked for adding opposite-signed infinities.
+     * Other infinities return infinity with the correct sign
+     */
+    if (isinf(c.f)) {
+        ret.x.exp = DF_INF_EXP;
+        ret.x.mant = 0;
+        ret.x.sign = c.x.sign;
+        return ret.f;
+    }
+    if (isinf(a.f) || isinf(b.f)) {
+        ret.x.exp = DF_INF_EXP;
+        ret.x.mant = 0;
+        ret.x.sign = (a.x.sign ^ b.x.sign);
+        return ret.f;
+    }
+    g_assert_not_reached();
+}
+
+static inline float special_fmaf(sf_t a, sf_t b, sf_t c)
+{
+    df_t aa, bb, cc;
+    aa.f = a.f;
+    bb.f = b.f;
+    cc.f = c.f;
+    return special_fma(aa, bb, cc);
+}
+
+float internal_fmafx(float a_in, float b_in, float c_in, int scale)
+{
+    sf_t a, b, c;
+    xf_t prod;
+    xf_t acc;
+    xf_t result;
+    xf_init(&prod);
+    xf_init(&acc);
+    xf_init(&result);
+    a.f = a_in;
+    b.f = b_in;
+    c.f = c_in;
+
+    if (isinf(a.f) || isinf(b.f) || isinf(c.f)) {
+        return special_fmaf(a, b, c);
+    }
+    if (isnan(a.f) || isnan(b.f) || isnan(c.f)) {
+        return special_fmaf(a, b, c);
+    }
+    if ((scale == 0) && (isz(a.f) || isz(b.f))) {
+        return a.f * b.f + c.f;
+    }
+
+    /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
+    prod.mant = int128_mul_6464(sf_getmant(a), sf_getmant(b));
+
+    /*
+     * Note: extracting the mantissa into an int is multiplying by
+     * 2**23, so adjust here
+     */
+    prod.exp = sf_getexp(a) + sf_getexp(b) - SF_BIAS - 23;
+    prod.sign = a.x.sign ^ b.x.sign;
+    if (isz(a.f) || isz(b.f)) {
+        prod.exp = -2 * WAY_BIG_EXP;
+    }
+    if ((scale > 0) && (fpclassify(c.f) == FP_SUBNORMAL)) {
+        acc.mant = int128_mul_6464(0, 0);
+        acc.exp = -WAY_BIG_EXP;
+        acc.sign = c.x.sign;
+        acc.sticky = 1;
+        result = xf_add(prod, acc);
+    } else if (!isz(c.f)) {
+        acc.mant = int128_mul_6464(sf_getmant(c), 1);
+        acc.exp = sf_getexp(c);
+        acc.sign = c.x.sign;
+        result = xf_add(prod, acc);
+    } else {
+        result = prod;
+    }
+    result.exp += scale;
+    return xf_round_sf_t(result).f;
+}
+
+
+float internal_fmaf(float a_in, float b_in, float c_in)
+{
+    return internal_fmafx(a_in, b_in, c_in, 0);
+}
+
+float internal_mpyf(float a_in, float b_in)
+{
+    if (isz(a_in) || isz(b_in)) {
+        return a_in * b_in;
+    }
+    return internal_fmafx(a_in, b_in, 0.0, 0);
+}
+
+static inline double internal_mpyhh_special(double a, double b)
+{
+    return a * b;
+}
+
+double internal_mpyhh(double a_in, double b_in,
+                      unsigned long long int accumulated)
+{
+    df_t a, b;
+    xf_t x;
+    unsigned long long int prod;
+    unsigned int sticky;
+
+    a.f = a_in;
+    b.f = b_in;
+    sticky = accumulated & 1;
+    accumulated >>= 1;
+    xf_init(&x);
+    if (isz(a_in) || isnan(a_in) || isinf(a_in)) {
+        return internal_mpyhh_special(a_in, b_in);
+    }
+    if (isz(b_in) || isnan(b_in) || isinf(b_in)) {
+        return internal_mpyhh_special(a_in, b_in);
+    }
+    x.mant = int128_mul_6464(accumulated, 1);
+    x.sticky = sticky;
+    prod = fGETUWORD(1, df_getmant(a)) * fGETUWORD(1, df_getmant(b));
+    x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
+    x.exp = df_getexp(a) + df_getexp(b) - DF_BIAS - 20;
+    if (!isnormal(a_in) || !isnormal(b_in)) {
+        /* crush to inexact zero */
+        x.sticky = 1;
+        x.exp = -4096;
+    }
+    x.sign = a.x.sign ^ b.x.sign;
+    return xf_round_df_t(x).f;
+}
+
+float conv_df_to_sf(double in_f)
+{
+    xf_t x;
+    df_t in;
+    if (isz(in_f) || isnan(in_f) || isinf(in_f)) {
+        return in_f;
+    }
+    xf_init(&x);
+    in.f = in_f;
+    x.mant = int128_mul_6464(df_getmant(in), 1);
+    x.exp = df_getexp(in) - DF_BIAS + SF_BIAS - 52 + 23;
+    x.sign = in.x.sign;
+    return xf_round_sf_t(x).f;
+}
+
-- 
2.7.4


  parent reply	other threads:[~2020-09-28 17:41 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-28 17:28 [RFC PATCH v4 00/29] Hexagon patch series Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 01/29] Hexagon Update MAINTAINERS file Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 02/29] Hexagon (target/hexagon) README Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 03/29] Hexagon (include/elf.h) ELF machine definition Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 04/29] Hexagon (target/hexagon) scalar core definition Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 05/29] Hexagon (disas) disassembler Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 06/29] Hexagon (target/hexagon) register names Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 07/29] Hexagon (target/hexagon) scalar core helpers Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 08/29] Hexagon (target/hexagon) GDB Stub Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 09/29] Hexagon (target/hexagon) architecture types Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 10/29] Hexagon (target/hexagon) instruction and packet types Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 11/29] Hexagon (target/hexagon) register fields Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 12/29] Hexagon (target/hexagon) instruction attributes Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 13/29] Hexagon (target/hexagon) instruction/packet decode Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 14/29] Hexagon (target/hexagon) instruction printing Taylor Simpson
2020-09-28 17:28 ` Taylor Simpson [this message]
2020-09-29 11:25   ` [RFC PATCH v4 15/29] Hexagon (target/hexagon) utility functions Philippe Mathieu-Daudé
2020-09-29 15:48     ` Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 16/29] Hexagon (target/hexagon/imported) arch import Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 17/29] Hexagon (target/hexagon) generator phase 1 - C preprocessor for semantics Taylor Simpson
2020-09-29 11:31   ` Philippe Mathieu-Daudé
2020-09-28 17:28 ` [RFC PATCH v4 18/29] Hexagon (target/hexagon) generator phase 2 - generate header files Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 19/29] Hexagon (target/hexagon) generator phase 3 - C preprocessor for decode tree Taylor Simpson
2020-09-29 11:37   ` Philippe Mathieu-Daudé
2020-10-01 19:54     ` Richard Henderson
2020-10-01 23:31       ` Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 20/29] Hexagon (target/hexagon) generater phase 4 - " Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 21/29] Hexagon (target/hexagon) opcode data structures Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 22/29] Hexagon (target/hexagon) macros Taylor Simpson
2020-09-29 12:34   ` Philippe Mathieu-Daudé
2020-09-30 21:22     ` Taylor Simpson
2020-10-01  8:53       ` Philippe Mathieu-Daudé
2020-09-28 17:28 ` [RFC PATCH v4 23/29] Hexagon (target/hexagon) instruction classes Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 24/29] Hexagon (target/hexagon) TCG generation Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 25/29] Hexagon (target/hexagon) TCG for instructions with multiple definitions Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 26/29] Hexagon (target/hexagon) translation Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 27/29] Hexagon (linux-user/hexagon) Linux user emulation Taylor Simpson
2020-09-28 21:02   ` Laurent Vivier
2020-09-30 20:47     ` Taylor Simpson
2020-10-01  7:53       ` Laurent Vivier
2020-10-01 15:40         ` Taylor Simpson
2020-10-01 18:08           ` Laurent Vivier
2020-09-28 17:28 ` [RFC PATCH v4 28/29] Hexagon (tests/tcg/hexagon) TCG tests Taylor Simpson
2020-09-28 17:28 ` [RFC PATCH v4 29/29] Hexagon build infrastructure Taylor Simpson
2020-09-29  1:12 ` [RFC PATCH v4 00/29] Hexagon patch series no-reply
2020-09-29  1:21 ` no-reply
2020-09-29 12:22 ` Philippe Mathieu-Daudé
2020-09-29 15:53   ` Taylor Simpson
2020-09-29 17:01     ` Philippe Mathieu-Daudé
2020-09-29 20:11       ` Taylor Simpson
2020-09-29 20:41         ` Philippe Mathieu-Daudé
2020-09-29 21:28         ` Eric Blake
2020-09-29 22:16           ` Taylor Simpson
2020-09-30  4:08       ` Brad Smith
2020-10-02 17:16 ` Richard Henderson
     [not found] <1601296608-29390-1-git-send-email-tsimpson@quicinc.com>
2020-09-28 12:36 ` [RFC PATCH v4 15/29] Hexagon (target/hexagon) utility functions Taylor Simpson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1601314138-9930-16-git-send-email-tsimpson@quicinc.com \
    --to=tsimpson@quicinc.com \
    --cc=ale@rev.ng \
    --cc=aleksandar.m.mail@gmail.com \
    --cc=laurent@vivier.eu \
    --cc=philmd@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=riku.voipio@iki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.