All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 06/15] Sparc: split FPU and VIS op helpers
@ 2011-09-11 13:30 Blue Swirl
  0 siblings, 0 replies; only message in thread
From: Blue Swirl @ 2011-09-11 13:30 UTC (permalink / raw)
  To: qemu-devel

Move FPU op helpers to fop_helper.c. Move VIS op helpers to vis_helper.c,
compile it only for Sparc64.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 Makefile.target           |    5 +-
 target-sparc/fop_helper.c |  394 ++++++++++++++++++++++++
 target-sparc/op_helper.c  |  743 ---------------------------------------------
 target-sparc/vis_helper.c |  403 ++++++++++++++++++++++++
 4 files changed, 800 insertions(+), 745 deletions(-)
 create mode 100644 target-sparc/fop_helper.c
 create mode 100644 target-sparc/vis_helper.c

diff --git a/Makefile.target b/Makefile.target
index c3074f4..8f7bdab 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -74,10 +74,11 @@ libobj-y += op_helper.o helper.o
 ifeq ($(TARGET_BASE_ARCH), i386)
 libobj-y += cpuid.o
 endif
+libobj-$(TARGET_SPARC64) += vis_helper.o
 libobj-$(CONFIG_NEED_MMU) += mmu.o
 libobj-$(TARGET_ARM) += neon_helper.o iwmmxt_helper.o
 ifeq ($(TARGET_BASE_ARCH), sparc)
-libobj-y += int_helper.o cpu_init.o
+libobj-y += int_helper.o fop_helper.o cpu_init.o
 endif

 libobj-y += disas.o
@@ -94,7 +95,7 @@ tcg/tcg.o: cpu.h

 # HELPER_CFLAGS is used for all the code compiled with static register
 # variables
-op_helper.o user-exec.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
+op_helper.o fop_helper.o vis_helper.o user-exec.o: QEMU_CFLAGS +=
$(HELPER_CFLAGS)

 # Note: this is a workaround. The real fix is to avoid compiling
 # cpu_signal_handler() in user-exec.c.
diff --git a/target-sparc/fop_helper.c b/target-sparc/fop_helper.c
new file mode 100644
index 0000000..ddd0af9
--- /dev/null
+++ b/target-sparc/fop_helper.c
@@ -0,0 +1,394 @@
+/*
+ * FPU op helpers
+ *
+ *  Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "dyngen-exec.h"
+#include "helper.h"
+
+#define DT0 (env->dt0)
+#define DT1 (env->dt1)
+#define QT0 (env->qt0)
+#define QT1 (env->qt1)
+
+#define F_HELPER(name, p) void helper_f##name##p(void)
+
+#define F_BINOP(name)                                           \
+    float32 helper_f ## name ## s (float32 src1, float32 src2)  \
+    {                                                           \
+        return float32_ ## name (src1, src2, &env->fp_status);  \
+    }                                                           \
+    F_HELPER(name, d)                                           \
+    {                                                           \
+        DT0 = float64_ ## name (DT0, DT1, &env->fp_status);     \
+    }                                                           \
+    F_HELPER(name, q)                                           \
+    {                                                           \
+        QT0 = float128_ ## name (QT0, QT1, &env->fp_status);    \
+    }
+
+F_BINOP(add);
+F_BINOP(sub);
+F_BINOP(mul);
+F_BINOP(div);
+#undef F_BINOP
+
+void helper_fsmuld(float32 src1, float32 src2)
+{
+    DT0 = float64_mul(float32_to_float64(src1, &env->fp_status),
+                      float32_to_float64(src2, &env->fp_status),
+                      &env->fp_status);
+}
+
+void helper_fdmulq(void)
+{
+    QT0 = float128_mul(float64_to_float128(DT0, &env->fp_status),
+                       float64_to_float128(DT1, &env->fp_status),
+                       &env->fp_status);
+}
+
+float32 helper_fnegs(float32 src)
+{
+    return float32_chs(src);
+}
+
+#ifdef TARGET_SPARC64
+F_HELPER(neg, d)
+{
+    DT0 = float64_chs(DT1);
+}
+
+F_HELPER(neg, q)
+{
+    QT0 = float128_chs(QT1);
+}
+#endif
+
+/* Integer to float conversion.  */
+float32 helper_fitos(int32_t src)
+{
+    return int32_to_float32(src, &env->fp_status);
+}
+
+void helper_fitod(int32_t src)
+{
+    DT0 = int32_to_float64(src, &env->fp_status);
+}
+
+void helper_fitoq(int32_t src)
+{
+    QT0 = int32_to_float128(src, &env->fp_status);
+}
+
+#ifdef TARGET_SPARC64
+float32 helper_fxtos(void)
+{
+    return int64_to_float32(*((int64_t *)&DT1), &env->fp_status);
+}
+
+F_HELPER(xto, d)
+{
+    DT0 = int64_to_float64(*((int64_t *)&DT1), &env->fp_status);
+}
+
+F_HELPER(xto, q)
+{
+    QT0 = int64_to_float128(*((int64_t *)&DT1), &env->fp_status);
+}
+#endif
+#undef F_HELPER
+
+/* floating point conversion */
+float32 helper_fdtos(void)
+{
+    return float64_to_float32(DT1, &env->fp_status);
+}
+
+void helper_fstod(float32 src)
+{
+    DT0 = float32_to_float64(src, &env->fp_status);
+}
+
+float32 helper_fqtos(void)
+{
+    return float128_to_float32(QT1, &env->fp_status);
+}
+
+void helper_fstoq(float32 src)
+{
+    QT0 = float32_to_float128(src, &env->fp_status);
+}
+
+void helper_fqtod(void)
+{
+    DT0 = float128_to_float64(QT1, &env->fp_status);
+}
+
+void helper_fdtoq(void)
+{
+    QT0 = float64_to_float128(DT1, &env->fp_status);
+}
+
+/* Float to integer conversion.  */
+int32_t helper_fstoi(float32 src)
+{
+    return float32_to_int32_round_to_zero(src, &env->fp_status);
+}
+
+int32_t helper_fdtoi(void)
+{
+    return float64_to_int32_round_to_zero(DT1, &env->fp_status);
+}
+
+int32_t helper_fqtoi(void)
+{
+    return float128_to_int32_round_to_zero(QT1, &env->fp_status);
+}
+
+#ifdef TARGET_SPARC64
+void helper_fstox(float32 src)
+{
+    *((int64_t *)&DT0) = float32_to_int64_round_to_zero(src, &env->fp_status);
+}
+
+void helper_fdtox(void)
+{
+    *((int64_t *)&DT0) = float64_to_int64_round_to_zero(DT1, &env->fp_status);
+}
+
+void helper_fqtox(void)
+{
+    *((int64_t *)&DT0) = float128_to_int64_round_to_zero(QT1, &env->fp_status);
+}
+#endif
+
+float32 helper_fabss(float32 src)
+{
+    return float32_abs(src);
+}
+
+#ifdef TARGET_SPARC64
+void helper_fabsd(void)
+{
+    DT0 = float64_abs(DT1);
+}
+
+void helper_fabsq(void)
+{
+    QT0 = float128_abs(QT1);
+}
+#endif
+
+float32 helper_fsqrts(float32 src)
+{
+    return float32_sqrt(src, &env->fp_status);
+}
+
+void helper_fsqrtd(void)
+{
+    DT0 = float64_sqrt(DT1, &env->fp_status);
+}
+
+void helper_fsqrtq(void)
+{
+    QT0 = float128_sqrt(QT1, &env->fp_status);
+}
+
+#define GEN_FCMP(name, size, reg1, reg2, FS, E)                         \
+    void glue(helper_, name) (void)                                     \
+    {                                                                   \
+        env->fsr &= FSR_FTT_NMASK;                                      \
+        if (E && (glue(size, _is_any_nan)(reg1) ||                      \
+                  glue(size, _is_any_nan)(reg2)) &&                     \
+            (env->fsr & FSR_NVM)) {                                     \
+            env->fsr |= FSR_NVC;                                        \
+            env->fsr |= FSR_FTT_IEEE_EXCP;                              \
+            helper_raise_exception(env, TT_FP_EXCP);                    \
+        }                                                               \
+        switch (glue(size, _compare) (reg1, reg2, &env->fp_status)) {   \
+        case float_relation_unordered:                                  \
+            if ((env->fsr & FSR_NVM)) {                                 \
+                env->fsr |= FSR_NVC;                                    \
+                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
+                helper_raise_exception(env, TT_FP_EXCP);                \
+            } else {                                                    \
+                env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);             \
+                env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                \
+                env->fsr |= FSR_NVA;                                    \
+            }                                                           \
+            break;                                                      \
+        case float_relation_less:                                       \
+            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
+            env->fsr |= FSR_FCC0 << FS;                                 \
+            break;                                                      \
+        case float_relation_greater:                                    \
+            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
+            env->fsr |= FSR_FCC1 << FS;                                 \
+            break;                                                      \
+        default:                                                        \
+            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
+            break;                                                      \
+        }                                                               \
+    }
+#define GEN_FCMPS(name, size, FS, E)                                    \
+    void glue(helper_, name)(float32 src1, float32 src2)                \
+    {                                                                   \
+        env->fsr &= FSR_FTT_NMASK;                                      \
+        if (E && (glue(size, _is_any_nan)(src1) ||                      \
+                  glue(size, _is_any_nan)(src2)) &&                     \
+            (env->fsr & FSR_NVM)) {                                     \
+            env->fsr |= FSR_NVC;                                        \
+            env->fsr |= FSR_FTT_IEEE_EXCP;                              \
+            helper_raise_exception(env, TT_FP_EXCP);                    \
+        }                                                               \
+        switch (glue(size, _compare) (src1, src2, &env->fp_status)) {   \
+        case float_relation_unordered:                                  \
+            if ((env->fsr & FSR_NVM)) {                                 \
+                env->fsr |= FSR_NVC;                                    \
+                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
+                helper_raise_exception(env, TT_FP_EXCP);                \
+            } else {                                                    \
+                env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);             \
+                env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                \
+                env->fsr |= FSR_NVA;                                    \
+            }                                                           \
+            break;                                                      \
+        case float_relation_less:                                       \
+            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
+            env->fsr |= FSR_FCC0 << FS;                                 \
+            break;                                                      \
+        case float_relation_greater:                                    \
+            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
+            env->fsr |= FSR_FCC1 << FS;                                 \
+            break;                                                      \
+        default:                                                        \
+            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
+            break;                                                      \
+        }                                                               \
+    }
+
+GEN_FCMPS(fcmps, float32, 0, 0);
+GEN_FCMP(fcmpd, float64, DT0, DT1, 0, 0);
+
+GEN_FCMPS(fcmpes, float32, 0, 1);
+GEN_FCMP(fcmped, float64, DT0, DT1, 0, 1);
+
+GEN_FCMP(fcmpq, float128, QT0, QT1, 0, 0);
+GEN_FCMP(fcmpeq, float128, QT0, QT1, 0, 1);
+
+#ifdef TARGET_SPARC64
+GEN_FCMPS(fcmps_fcc1, float32, 22, 0);
+GEN_FCMP(fcmpd_fcc1, float64, DT0, DT1, 22, 0);
+GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
+
+GEN_FCMPS(fcmps_fcc2, float32, 24, 0);
+GEN_FCMP(fcmpd_fcc2, float64, DT0, DT1, 24, 0);
+GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
+
+GEN_FCMPS(fcmps_fcc3, float32, 26, 0);
+GEN_FCMP(fcmpd_fcc3, float64, DT0, DT1, 26, 0);
+GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
+
+GEN_FCMPS(fcmpes_fcc1, float32, 22, 1);
+GEN_FCMP(fcmped_fcc1, float64, DT0, DT1, 22, 1);
+GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
+
+GEN_FCMPS(fcmpes_fcc2, float32, 24, 1);
+GEN_FCMP(fcmped_fcc2, float64, DT0, DT1, 24, 1);
+GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
+
+GEN_FCMPS(fcmpes_fcc3, float32, 26, 1);
+GEN_FCMP(fcmped_fcc3, float64, DT0, DT1, 26, 1);
+GEN_FCMP(fcmpeq_fcc3, float128, QT0, QT1, 26, 1);
+#endif
+#undef GEN_FCMPS
+
+void helper_check_ieee_exceptions(void)
+{
+    target_ulong status;
+
+    status = get_float_exception_flags(&env->fp_status);
+    if (status) {
+        /* Copy IEEE 754 flags into FSR */
+        if (status & float_flag_invalid) {
+            env->fsr |= FSR_NVC;
+        }
+        if (status & float_flag_overflow) {
+            env->fsr |= FSR_OFC;
+        }
+        if (status & float_flag_underflow) {
+            env->fsr |= FSR_UFC;
+        }
+        if (status & float_flag_divbyzero) {
+            env->fsr |= FSR_DZC;
+        }
+        if (status & float_flag_inexact) {
+            env->fsr |= FSR_NXC;
+        }
+
+        if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
+            /* Unmasked exception, generate a trap */
+            env->fsr |= FSR_FTT_IEEE_EXCP;
+            helper_raise_exception(env, TT_FP_EXCP);
+        } else {
+            /* Accumulate exceptions */
+            env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
+        }
+    }
+}
+
+void helper_clear_float_exceptions(void)
+{
+    set_float_exception_flags(0, &env->fp_status);
+}
+
+static inline void set_fsr(void)
+{
+    int rnd_mode;
+
+    switch (env->fsr & FSR_RD_MASK) {
+    case FSR_RD_NEAREST:
+        rnd_mode = float_round_nearest_even;
+        break;
+    default:
+    case FSR_RD_ZERO:
+        rnd_mode = float_round_to_zero;
+        break;
+    case FSR_RD_POS:
+        rnd_mode = float_round_up;
+        break;
+    case FSR_RD_NEG:
+        rnd_mode = float_round_down;
+        break;
+    }
+    set_float_rounding_mode(rnd_mode, &env->fp_status);
+}
+
+void helper_ldfsr(uint32_t new_fsr)
+{
+    env->fsr = (new_fsr & FSR_LDFSR_MASK) | (env->fsr & FSR_LDFSR_OLDMASK);
+    set_fsr();
+}
+
+#ifdef TARGET_SPARC64
+void helper_ldxfsr(uint64_t new_fsr)
+{
+    env->fsr = (new_fsr & FSR_LDXFSR_MASK) | (env->fsr & FSR_LDXFSR_OLDMASK);
+    set_fsr();
+}
+#endif
diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
index df8e9ab..6fbef99 100644
--- a/target-sparc/op_helper.c
+++ b/target-sparc/op_helper.c
@@ -333,655 +333,6 @@ void helper_check_align(target_ulong addr, uint32_t align)
     }
 }

-#define F_HELPER(name, p) void helper_f##name##p(void)
-
-#define F_BINOP(name)                                           \
-    float32 helper_f ## name ## s (float32 src1, float32 src2)  \
-    {                                                           \
-        return float32_ ## name (src1, src2, &env->fp_status);  \
-    }                                                           \
-    F_HELPER(name, d)                                           \
-    {                                                           \
-        DT0 = float64_ ## name (DT0, DT1, &env->fp_status);     \
-    }                                                           \
-    F_HELPER(name, q)                                           \
-    {                                                           \
-        QT0 = float128_ ## name (QT0, QT1, &env->fp_status);    \
-    }
-
-F_BINOP(add);
-F_BINOP(sub);
-F_BINOP(mul);
-F_BINOP(div);
-#undef F_BINOP
-
-void helper_fsmuld(float32 src1, float32 src2)
-{
-    DT0 = float64_mul(float32_to_float64(src1, &env->fp_status),
-                      float32_to_float64(src2, &env->fp_status),
-                      &env->fp_status);
-}
-
-void helper_fdmulq(void)
-{
-    QT0 = float128_mul(float64_to_float128(DT0, &env->fp_status),
-                       float64_to_float128(DT1, &env->fp_status),
-                       &env->fp_status);
-}
-
-float32 helper_fnegs(float32 src)
-{
-    return float32_chs(src);
-}
-
-#ifdef TARGET_SPARC64
-F_HELPER(neg, d)
-{
-    DT0 = float64_chs(DT1);
-}
-
-F_HELPER(neg, q)
-{
-    QT0 = float128_chs(QT1);
-}
-#endif
-
-/* Integer to float conversion.  */
-float32 helper_fitos(int32_t src)
-{
-    return int32_to_float32(src, &env->fp_status);
-}
-
-void helper_fitod(int32_t src)
-{
-    DT0 = int32_to_float64(src, &env->fp_status);
-}
-
-void helper_fitoq(int32_t src)
-{
-    QT0 = int32_to_float128(src, &env->fp_status);
-}
-
-#ifdef TARGET_SPARC64
-float32 helper_fxtos(void)
-{
-    return int64_to_float32(*((int64_t *)&DT1), &env->fp_status);
-}
-
-F_HELPER(xto, d)
-{
-    DT0 = int64_to_float64(*((int64_t *)&DT1), &env->fp_status);
-}
-
-F_HELPER(xto, q)
-{
-    QT0 = int64_to_float128(*((int64_t *)&DT1), &env->fp_status);
-}
-#endif
-#undef F_HELPER
-
-/* floating point conversion */
-float32 helper_fdtos(void)
-{
-    return float64_to_float32(DT1, &env->fp_status);
-}
-
-void helper_fstod(float32 src)
-{
-    DT0 = float32_to_float64(src, &env->fp_status);
-}
-
-float32 helper_fqtos(void)
-{
-    return float128_to_float32(QT1, &env->fp_status);
-}
-
-void helper_fstoq(float32 src)
-{
-    QT0 = float32_to_float128(src, &env->fp_status);
-}
-
-void helper_fqtod(void)
-{
-    DT0 = float128_to_float64(QT1, &env->fp_status);
-}
-
-void helper_fdtoq(void)
-{
-    QT0 = float64_to_float128(DT1, &env->fp_status);
-}
-
-/* Float to integer conversion.  */
-int32_t helper_fstoi(float32 src)
-{
-    return float32_to_int32_round_to_zero(src, &env->fp_status);
-}
-
-int32_t helper_fdtoi(void)
-{
-    return float64_to_int32_round_to_zero(DT1, &env->fp_status);
-}
-
-int32_t helper_fqtoi(void)
-{
-    return float128_to_int32_round_to_zero(QT1, &env->fp_status);
-}
-
-#ifdef TARGET_SPARC64
-void helper_fstox(float32 src)
-{
-    *((int64_t *)&DT0) = float32_to_int64_round_to_zero(src, &env->fp_status);
-}
-
-void helper_fdtox(void)
-{
-    *((int64_t *)&DT0) = float64_to_int64_round_to_zero(DT1, &env->fp_status);
-}
-
-void helper_fqtox(void)
-{
-    *((int64_t *)&DT0) = float128_to_int64_round_to_zero(QT1, &env->fp_status);
-}
-
-void helper_faligndata(void)
-{
-    uint64_t tmp;
-
-    tmp = (*((uint64_t *)&DT0)) << ((env->gsr & 7) * 8);
-    /* on many architectures a shift of 64 does nothing */
-    if ((env->gsr & 7) != 0) {
-        tmp |= (*((uint64_t *)&DT1)) >> (64 - (env->gsr & 7) * 8);
-    }
-    *((uint64_t *)&DT0) = tmp;
-}
-
-#ifdef HOST_WORDS_BIGENDIAN
-#define VIS_B64(n) b[7 - (n)]
-#define VIS_W64(n) w[3 - (n)]
-#define VIS_SW64(n) sw[3 - (n)]
-#define VIS_L64(n) l[1 - (n)]
-#define VIS_B32(n) b[3 - (n)]
-#define VIS_W32(n) w[1 - (n)]
-#else
-#define VIS_B64(n) b[n]
-#define VIS_W64(n) w[n]
-#define VIS_SW64(n) sw[n]
-#define VIS_L64(n) l[n]
-#define VIS_B32(n) b[n]
-#define VIS_W32(n) w[n]
-#endif
-
-typedef union {
-    uint8_t b[8];
-    uint16_t w[4];
-    int16_t sw[4];
-    uint32_t l[2];
-    uint64_t ll;
-    float64 d;
-} VIS64;
-
-typedef union {
-    uint8_t b[4];
-    uint16_t w[2];
-    uint32_t l;
-    float32 f;
-} VIS32;
-
-void helper_fpmerge(void)
-{
-    VIS64 s, d;
-
-    s.d = DT0;
-    d.d = DT1;
-
-    /* Reverse calculation order to handle overlap */
-    d.VIS_B64(7) = s.VIS_B64(3);
-    d.VIS_B64(6) = d.VIS_B64(3);
-    d.VIS_B64(5) = s.VIS_B64(2);
-    d.VIS_B64(4) = d.VIS_B64(2);
-    d.VIS_B64(3) = s.VIS_B64(1);
-    d.VIS_B64(2) = d.VIS_B64(1);
-    d.VIS_B64(1) = s.VIS_B64(0);
-    /* d.VIS_B64(0) = d.VIS_B64(0); */
-
-    DT0 = d.d;
-}
-
-void helper_fmul8x16(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                 \
-    tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
-    if ((tmp & 0xff) > 0x7f) {                                  \
-        tmp += 0x100;                                           \
-    }                                                           \
-    d.VIS_W64(r) = tmp >> 8;
-
-    PMUL(0);
-    PMUL(1);
-    PMUL(2);
-    PMUL(3);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fmul8x16al(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                 \
-    tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
-    if ((tmp & 0xff) > 0x7f) {                                  \
-        tmp += 0x100;                                           \
-    }                                                           \
-    d.VIS_W64(r) = tmp >> 8;
-
-    PMUL(0);
-    PMUL(1);
-    PMUL(2);
-    PMUL(3);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fmul8x16au(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                 \
-    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
-    if ((tmp & 0xff) > 0x7f) {                                  \
-        tmp += 0x100;                                           \
-    }                                                           \
-    d.VIS_W64(r) = tmp >> 8;
-
-    PMUL(0);
-    PMUL(1);
-    PMUL(2);
-    PMUL(3);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fmul8sux16(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                         \
-    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
-    if ((tmp & 0xff) > 0x7f) {                                          \
-        tmp += 0x100;                                                   \
-    }                                                                   \
-    d.VIS_W64(r) = tmp >> 8;
-
-    PMUL(0);
-    PMUL(1);
-    PMUL(2);
-    PMUL(3);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fmul8ulx16(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                         \
-    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
-    if ((tmp & 0xff) > 0x7f) {                                          \
-        tmp += 0x100;                                                   \
-    }                                                                   \
-    d.VIS_W64(r) = tmp >> 8;
-
-    PMUL(0);
-    PMUL(1);
-    PMUL(2);
-    PMUL(3);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fmuld8sux16(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                         \
-    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
-    if ((tmp & 0xff) > 0x7f) {                                          \
-        tmp += 0x100;                                                   \
-    }                                                                   \
-    d.VIS_L64(r) = tmp;
-
-    /* Reverse calculation order to handle overlap */
-    PMUL(1);
-    PMUL(0);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fmuld8ulx16(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                         \
-    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
-    if ((tmp & 0xff) > 0x7f) {                                          \
-        tmp += 0x100;                                                   \
-    }                                                                   \
-    d.VIS_L64(r) = tmp;
-
-    /* Reverse calculation order to handle overlap */
-    PMUL(1);
-    PMUL(0);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fexpand(void)
-{
-    VIS32 s;
-    VIS64 d;
-
-    s.l = (uint32_t)(*(uint64_t *)&DT0 & 0xffffffff);
-    d.d = DT1;
-    d.VIS_W64(0) = s.VIS_B32(0) << 4;
-    d.VIS_W64(1) = s.VIS_B32(1) << 4;
-    d.VIS_W64(2) = s.VIS_B32(2) << 4;
-    d.VIS_W64(3) = s.VIS_B32(3) << 4;
-
-    DT0 = d.d;
-}
-
-#define VIS_HELPER(name, F)                             \
-    void name##16(void)                                 \
-    {                                                   \
-        VIS64 s, d;                                     \
-                                                        \
-        s.d = DT0;                                      \
-        d.d = DT1;                                      \
-                                                        \
-        d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
-        d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
-        d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
-        d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
-                                                        \
-        DT0 = d.d;                                      \
-    }                                                   \
-                                                        \
-    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
-    {                                                   \
-        VIS32 s, d;                                     \
-                                                        \
-        s.l = src1;                                     \
-        d.l = src2;                                     \
-                                                        \
-        d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
-        d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
-                                                        \
-        return d.l;                                     \
-    }                                                   \
-                                                        \
-    void name##32(void)                                 \
-    {                                                   \
-        VIS64 s, d;                                     \
-                                                        \
-        s.d = DT0;                                      \
-        d.d = DT1;                                      \
-                                                        \
-        d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
-        d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
-                                                        \
-        DT0 = d.d;                                      \
-    }                                                   \
-                                                        \
-    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
-    {                                                   \
-        VIS32 s, d;                                     \
-                                                        \
-        s.l = src1;                                     \
-        d.l = src2;                                     \
-                                                        \
-        d.l = F(d.l, s.l);                              \
-                                                        \
-        return d.l;                                     \
-    }
-
-#define FADD(a, b) ((a) + (b))
-#define FSUB(a, b) ((a) - (b))
-VIS_HELPER(helper_fpadd, FADD)
-VIS_HELPER(helper_fpsub, FSUB)
-
-#define VIS_CMPHELPER(name, F)                                    \
-    uint64_t name##16(void)                                       \
-    {                                                             \
-        VIS64 s, d;                                               \
-                                                                  \
-        s.d = DT0;                                                \
-        d.d = DT1;                                                \
-                                                                  \
-        d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
-        d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
-        d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
-        d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
-        d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
-                                                                  \
-        return d.ll;                                              \
-    }                                                             \
-                                                                  \
-    uint64_t name##32(void)                                       \
-    {                                                             \
-        VIS64 s, d;                                               \
-                                                                  \
-        s.d = DT0;                                                \
-        d.d = DT1;                                                \
-                                                                  \
-        d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
-        d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
-        d.VIS_L64(1) = 0;                                         \
-                                                                  \
-        return d.ll;                                              \
-    }
-
-#define FCMPGT(a, b) ((a) > (b))
-#define FCMPEQ(a, b) ((a) == (b))
-#define FCMPLE(a, b) ((a) <= (b))
-#define FCMPNE(a, b) ((a) != (b))
-
-VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
-VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
-VIS_CMPHELPER(helper_fcmple, FCMPLE)
-VIS_CMPHELPER(helper_fcmpne, FCMPNE)
-#endif
-
-void helper_check_ieee_exceptions(void)
-{
-    target_ulong status;
-
-    status = get_float_exception_flags(&env->fp_status);
-    if (status) {
-        /* Copy IEEE 754 flags into FSR */
-        if (status & float_flag_invalid) {
-            env->fsr |= FSR_NVC;
-        }
-        if (status & float_flag_overflow) {
-            env->fsr |= FSR_OFC;
-        }
-        if (status & float_flag_underflow) {
-            env->fsr |= FSR_UFC;
-        }
-        if (status & float_flag_divbyzero) {
-            env->fsr |= FSR_DZC;
-        }
-        if (status & float_flag_inexact) {
-            env->fsr |= FSR_NXC;
-        }
-
-        if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
-            /* Unmasked exception, generate a trap */
-            env->fsr |= FSR_FTT_IEEE_EXCP;
-            helper_raise_exception(env, TT_FP_EXCP);
-        } else {
-            /* Accumulate exceptions */
-            env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
-        }
-    }
-}
-
-void helper_clear_float_exceptions(void)
-{
-    set_float_exception_flags(0, &env->fp_status);
-}
-
-float32 helper_fabss(float32 src)
-{
-    return float32_abs(src);
-}
-
-#ifdef TARGET_SPARC64
-void helper_fabsd(void)
-{
-    DT0 = float64_abs(DT1);
-}
-
-void helper_fabsq(void)
-{
-    QT0 = float128_abs(QT1);
-}
-#endif
-
-float32 helper_fsqrts(float32 src)
-{
-    return float32_sqrt(src, &env->fp_status);
-}
-
-void helper_fsqrtd(void)
-{
-    DT0 = float64_sqrt(DT1, &env->fp_status);
-}
-
-void helper_fsqrtq(void)
-{
-    QT0 = float128_sqrt(QT1, &env->fp_status);
-}
-
-#define GEN_FCMP(name, size, reg1, reg2, FS, E)                         \
-    void glue(helper_, name) (void)                                     \
-    {                                                                   \
-        env->fsr &= FSR_FTT_NMASK;                                      \
-        if (E && (glue(size, _is_any_nan)(reg1) ||                      \
-                  glue(size, _is_any_nan)(reg2)) &&                     \
-            (env->fsr & FSR_NVM)) {                                     \
-            env->fsr |= FSR_NVC;                                        \
-            env->fsr |= FSR_FTT_IEEE_EXCP;                              \
-            helper_raise_exception(env, TT_FP_EXCP);                    \
-        }                                                               \
-        switch (glue(size, _compare) (reg1, reg2, &env->fp_status)) {   \
-        case float_relation_unordered:                                  \
-            if ((env->fsr & FSR_NVM)) {                                 \
-                env->fsr |= FSR_NVC;                                    \
-                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
-                helper_raise_exception(env, TT_FP_EXCP);                \
-            } else {                                                    \
-                env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);             \
-                env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                \
-                env->fsr |= FSR_NVA;                                    \
-            }                                                           \
-            break;                                                      \
-        case float_relation_less:                                       \
-            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
-            env->fsr |= FSR_FCC0 << FS;                                 \
-            break;                                                      \
-        case float_relation_greater:                                    \
-            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
-            env->fsr |= FSR_FCC1 << FS;                                 \
-            break;                                                      \
-        default:                                                        \
-            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
-            break;                                                      \
-        }                                                               \
-    }
-#define GEN_FCMPS(name, size, FS, E)                                    \
-    void glue(helper_, name)(float32 src1, float32 src2)                \
-    {                                                                   \
-        env->fsr &= FSR_FTT_NMASK;                                      \
-        if (E && (glue(size, _is_any_nan)(src1) ||                      \
-                  glue(size, _is_any_nan)(src2)) &&                     \
-            (env->fsr & FSR_NVM)) {                                     \
-            env->fsr |= FSR_NVC;                                        \
-            env->fsr |= FSR_FTT_IEEE_EXCP;                              \
-            helper_raise_exception(env, TT_FP_EXCP);                    \
-        }                                                               \
-        switch (glue(size, _compare) (src1, src2, &env->fp_status)) {   \
-        case float_relation_unordered:                                  \
-            if ((env->fsr & FSR_NVM)) {                                 \
-                env->fsr |= FSR_NVC;                                    \
-                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
-                helper_raise_exception(env, TT_FP_EXCP);                \
-            } else {                                                    \
-                env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);             \
-                env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                \
-                env->fsr |= FSR_NVA;                                    \
-            }                                                           \
-            break;                                                      \
-        case float_relation_less:                                       \
-            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
-            env->fsr |= FSR_FCC0 << FS;                                 \
-            break;                                                      \
-        case float_relation_greater:                                    \
-            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
-            env->fsr |= FSR_FCC1 << FS;                                 \
-            break;                                                      \
-        default:                                                        \
-            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
-            break;                                                      \
-        }                                                               \
-    }
-
-GEN_FCMPS(fcmps, float32, 0, 0);
-GEN_FCMP(fcmpd, float64, DT0, DT1, 0, 0);
-
-GEN_FCMPS(fcmpes, float32, 0, 1);
-GEN_FCMP(fcmped, float64, DT0, DT1, 0, 1);
-
-GEN_FCMP(fcmpq, float128, QT0, QT1, 0, 0);
-GEN_FCMP(fcmpeq, float128, QT0, QT1, 0, 1);
-
 static uint32_t compute_all_flags(void)
 {
     return env->psr & PSR_ICC;
@@ -1580,33 +931,6 @@ int cpu_cwp_dec(CPUState *env1, int cwp)
     return ret;
 }

-#ifdef TARGET_SPARC64
-GEN_FCMPS(fcmps_fcc1, float32, 22, 0);
-GEN_FCMP(fcmpd_fcc1, float64, DT0, DT1, 22, 0);
-GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
-
-GEN_FCMPS(fcmps_fcc2, float32, 24, 0);
-GEN_FCMP(fcmpd_fcc2, float64, DT0, DT1, 24, 0);
-GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
-
-GEN_FCMPS(fcmps_fcc3, float32, 26, 0);
-GEN_FCMP(fcmpd_fcc3, float64, DT0, DT1, 26, 0);
-GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
-
-GEN_FCMPS(fcmpes_fcc1, float32, 22, 1);
-GEN_FCMP(fcmped_fcc1, float64, DT0, DT1, 22, 1);
-GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
-
-GEN_FCMPS(fcmpes_fcc2, float32, 24, 1);
-GEN_FCMP(fcmped_fcc2, float64, DT0, DT1, 24, 1);
-GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
-
-GEN_FCMPS(fcmpes_fcc3, float32, 26, 1);
-GEN_FCMP(fcmped_fcc3, float64, DT0, DT1, 26, 1);
-GEN_FCMP(fcmpeq_fcc3, float128, QT0, QT1, 26, 1);
-#endif
-#undef GEN_FCMPS
-
 #if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY) &&   \
     defined(DEBUG_MXCC)
 static void dump_mxcc(CPUState *env)
@@ -3748,42 +3072,6 @@ void helper_stqf(target_ulong addr, int mem_idx)
 #endif
 }

-static inline void set_fsr(void)
-{
-    int rnd_mode;
-
-    switch (env->fsr & FSR_RD_MASK) {
-    case FSR_RD_NEAREST:
-        rnd_mode = float_round_nearest_even;
-        break;
-    default:
-    case FSR_RD_ZERO:
-        rnd_mode = float_round_to_zero;
-        break;
-    case FSR_RD_POS:
-        rnd_mode = float_round_up;
-        break;
-    case FSR_RD_NEG:
-        rnd_mode = float_round_down;
-        break;
-    }
-    set_float_rounding_mode(rnd_mode, &env->fp_status);
-}
-
-void helper_ldfsr(uint32_t new_fsr)
-{
-    env->fsr = (new_fsr & FSR_LDFSR_MASK) | (env->fsr & FSR_LDFSR_OLDMASK);
-    set_fsr();
-}
-
-#ifdef TARGET_SPARC64
-void helper_ldxfsr(uint64_t new_fsr)
-{
-    env->fsr = (new_fsr & FSR_LDXFSR_MASK) | (env->fsr & FSR_LDXFSR_OLDMASK);
-    set_fsr();
-}
-#endif
-
 #ifndef TARGET_SPARC64
 /* XXX: use another pointer for %iN registers to avoid slow wrapping
    handling ? */
@@ -3993,37 +3281,6 @@ void helper_wrcwp(target_ulong new_cwp)
     put_cwp64(new_cwp);
 }

-/* This function uses non-native bit order */
-#define GET_FIELD(X, FROM, TO)                                  \
-    ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
-
-/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
-#define GET_FIELD_SP(X, FROM, TO)               \
-    GET_FIELD(X, 63 - (TO), 63 - (FROM))
-
-target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
-{
-    return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
-        (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
-        (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
-        (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
-        (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
-        (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
-        (((pixel_addr >> 55) & 1) << 4) |
-        (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
-        GET_FIELD_SP(pixel_addr, 11, 12);
-}
-
-target_ulong helper_alignaddr(target_ulong addr, target_ulong offset)
-{
-    uint64_t tmp;
-
-    tmp = addr + offset;
-    env->gsr &= ~7ULL;
-    env->gsr |= tmp & 7ULL;
-    return tmp & ~7ULL;
-}
-
 static inline uint64_t *get_gregset(uint32_t pstate)
 {
     switch (pstate) {
diff --git a/target-sparc/vis_helper.c b/target-sparc/vis_helper.c
new file mode 100644
index 0000000..87a86ef
--- /dev/null
+++ b/target-sparc/vis_helper.c
@@ -0,0 +1,403 @@
+/*
+ * VIS op helpers
+ *
+ *  Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "dyngen-exec.h"
+#include "helper.h"
+
+#define DT0 (env->dt0)
+#define DT1 (env->dt1)
+#define QT0 (env->qt0)
+#define QT1 (env->qt1)
+
+/* This function uses non-native bit order */
+#define GET_FIELD(X, FROM, TO)                                  \
+    ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
+
+/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
+#define GET_FIELD_SP(X, FROM, TO)               \
+    GET_FIELD(X, 63 - (TO), 63 - (FROM))
+
+target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
+{
+    return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
+        (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
+        (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
+        (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
+        (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
+        (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
+        (((pixel_addr >> 55) & 1) << 4) |
+        (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
+        GET_FIELD_SP(pixel_addr, 11, 12);
+}
+
+target_ulong helper_alignaddr(target_ulong addr, target_ulong offset)
+{
+    uint64_t tmp;
+
+    tmp = addr + offset;
+    env->gsr &= ~7ULL;
+    env->gsr |= tmp & 7ULL;
+    return tmp & ~7ULL;
+}
+
+void helper_faligndata(void)
+{
+    uint64_t tmp;
+
+    tmp = (*((uint64_t *)&DT0)) << ((env->gsr & 7) * 8);
+    /* on many architectures a shift of 64 does nothing */
+    if ((env->gsr & 7) != 0) {
+        tmp |= (*((uint64_t *)&DT1)) >> (64 - (env->gsr & 7) * 8);
+    }
+    *((uint64_t *)&DT0) = tmp;
+}
+
+#ifdef HOST_WORDS_BIGENDIAN
+#define VIS_B64(n) b[7 - (n)]
+#define VIS_W64(n) w[3 - (n)]
+#define VIS_SW64(n) sw[3 - (n)]
+#define VIS_L64(n) l[1 - (n)]
+#define VIS_B32(n) b[3 - (n)]
+#define VIS_W32(n) w[1 - (n)]
+#else
+#define VIS_B64(n) b[n]
+#define VIS_W64(n) w[n]
+#define VIS_SW64(n) sw[n]
+#define VIS_L64(n) l[n]
+#define VIS_B32(n) b[n]
+#define VIS_W32(n) w[n]
+#endif
+
+typedef union {
+    uint8_t b[8];
+    uint16_t w[4];
+    int16_t sw[4];
+    uint32_t l[2];
+    uint64_t ll;
+    float64 d;
+} VIS64;
+
+typedef union {
+    uint8_t b[4];
+    uint16_t w[2];
+    uint32_t l;
+    float32 f;
+} VIS32;
+
+void helper_fpmerge(void)
+{
+    VIS64 s, d;
+
+    s.d = DT0;
+    d.d = DT1;
+
+    /* Reverse calculation order to handle overlap */
+    d.VIS_B64(7) = s.VIS_B64(3);
+    d.VIS_B64(6) = d.VIS_B64(3);
+    d.VIS_B64(5) = s.VIS_B64(2);
+    d.VIS_B64(4) = d.VIS_B64(2);
+    d.VIS_B64(3) = s.VIS_B64(1);
+    d.VIS_B64(2) = d.VIS_B64(1);
+    d.VIS_B64(1) = s.VIS_B64(0);
+    /* d.VIS_B64(0) = d.VIS_B64(0); */
+
+    DT0 = d.d;
+}
+
+void helper_fmul8x16(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                 \
+    tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
+    if ((tmp & 0xff) > 0x7f) {                                  \
+        tmp += 0x100;                                           \
+    }                                                           \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmul8x16al(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                 \
+    tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
+    if ((tmp & 0xff) > 0x7f) {                                  \
+        tmp += 0x100;                                           \
+    }                                                           \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmul8x16au(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                 \
+    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
+    if ((tmp & 0xff) > 0x7f) {                                  \
+        tmp += 0x100;                                           \
+    }                                                           \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmul8sux16(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                         \
+    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
+    if ((tmp & 0xff) > 0x7f) {                                          \
+        tmp += 0x100;                                                   \
+    }                                                                   \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmul8ulx16(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                         \
+    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
+    if ((tmp & 0xff) > 0x7f) {                                          \
+        tmp += 0x100;                                                   \
+    }                                                                   \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmuld8sux16(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                         \
+    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
+    if ((tmp & 0xff) > 0x7f) {                                          \
+        tmp += 0x100;                                                   \
+    }                                                                   \
+    d.VIS_L64(r) = tmp;
+
+    /* Reverse calculation order to handle overlap */
+    PMUL(1);
+    PMUL(0);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmuld8ulx16(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                         \
+    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
+    if ((tmp & 0xff) > 0x7f) {                                          \
+        tmp += 0x100;                                                   \
+    }                                                                   \
+    d.VIS_L64(r) = tmp;
+
+    /* Reverse calculation order to handle overlap */
+    PMUL(1);
+    PMUL(0);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fexpand(void)
+{
+    VIS32 s;
+    VIS64 d;
+
+    s.l = (uint32_t)(*(uint64_t *)&DT0 & 0xffffffff);
+    d.d = DT1;
+    d.VIS_W64(0) = s.VIS_B32(0) << 4;
+    d.VIS_W64(1) = s.VIS_B32(1) << 4;
+    d.VIS_W64(2) = s.VIS_B32(2) << 4;
+    d.VIS_W64(3) = s.VIS_B32(3) << 4;
+
+    DT0 = d.d;
+}
+
+#define VIS_HELPER(name, F)                             \
+    void name##16(void)                                 \
+    {                                                   \
+        VIS64 s, d;                                     \
+                                                        \
+        s.d = DT0;                                      \
+        d.d = DT1;                                      \
+                                                        \
+        d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
+        d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
+        d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
+        d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
+                                                        \
+        DT0 = d.d;                                      \
+    }                                                   \
+                                                        \
+    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
+    {                                                   \
+        VIS32 s, d;                                     \
+                                                        \
+        s.l = src1;                                     \
+        d.l = src2;                                     \
+                                                        \
+        d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
+        d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
+                                                        \
+        return d.l;                                     \
+    }                                                   \
+                                                        \
+    void name##32(void)                                 \
+    {                                                   \
+        VIS64 s, d;                                     \
+                                                        \
+        s.d = DT0;                                      \
+        d.d = DT1;                                      \
+                                                        \
+        d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
+        d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
+                                                        \
+        DT0 = d.d;                                      \
+    }                                                   \
+                                                        \
+    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
+    {                                                   \
+        VIS32 s, d;                                     \
+                                                        \
+        s.l = src1;                                     \
+        d.l = src2;                                     \
+                                                        \
+        d.l = F(d.l, s.l);                              \
+                                                        \
+        return d.l;                                     \
+    }
+
+#define FADD(a, b) ((a) + (b))
+#define FSUB(a, b) ((a) - (b))
+VIS_HELPER(helper_fpadd, FADD)
+VIS_HELPER(helper_fpsub, FSUB)
+
+#define VIS_CMPHELPER(name, F)                                    \
+    uint64_t name##16(void)                                       \
+    {                                                             \
+        VIS64 s, d;                                               \
+                                                                  \
+        s.d = DT0;                                                \
+        d.d = DT1;                                                \
+                                                                  \
+        d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
+        d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
+        d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
+        d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
+        d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
+                                                                  \
+        return d.ll;                                              \
+    }                                                             \
+                                                                  \
+    uint64_t name##32(void)                                       \
+    {                                                             \
+        VIS64 s, d;                                               \
+                                                                  \
+        s.d = DT0;                                                \
+        d.d = DT1;                                                \
+                                                                  \
+        d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
+        d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
+        d.VIS_L64(1) = 0;                                         \
+                                                                  \
+        return d.ll;                                              \
+    }
+
+#define FCMPGT(a, b) ((a) > (b))
+#define FCMPEQ(a, b) ((a) == (b))
+#define FCMPLE(a, b) ((a) <= (b))
+#define FCMPNE(a, b) ((a) != (b))
+
+VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
+VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
+VIS_CMPHELPER(helper_fcmple, FCMPLE)
+VIS_CMPHELPER(helper_fcmpne, FCMPNE)
-- 
1.6.2.4

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2011-09-11 13:30 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-11 13:30 [Qemu-devel] [PATCH 06/15] Sparc: split FPU and VIS op helpers Blue Swirl

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.