qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Jan Bobek <jan.bobek@gmail.com>
To: qemu-devel@nongnu.org
Cc: "Jan Bobek" <jan.bobek@gmail.com>,
	"Alex Bennée" <alex.bennee@linaro.org>,
	"Richard Henderson" <richard.henderson@linaro.org>
Subject: [Qemu-devel] [RFC PATCH v4 68/75] target/i386: convert ps((l, r)l(w, d, q), ra(w, d)) to helpers to gvec style
Date: Wed, 21 Aug 2019 13:29:44 -0400	[thread overview]
Message-ID: <20190821172951.15333-69-jan.bobek@gmail.com> (raw)
In-Reply-To: <20190821172951.15333-1-jan.bobek@gmail.com>

Make these helpers suitable for use with tcg_gen_gvec_* functions.

Signed-off-by: Jan Bobek <jan.bobek@gmail.com>
---
 target/i386/ops_sse.h        | 357 +++++++++++++++++++++--------------
 target/i386/ops_sse_header.h |  30 ++-
 target/i386/translate.c      | 259 +++++++------------------
 3 files changed, 306 insertions(+), 340 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index aca6b50f23..168e581c0c 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -19,6 +19,7 @@
  */
 
 #include "crypto/aes.h"
+#include "tcg-gvec-desc.h"
 
 #if SHIFT == 0
 #define Reg MMXReg
@@ -38,199 +39,273 @@
 #define SUFFIX _xmm
 #endif
 
-void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+static inline void glue(clear_high, SUFFIX)(Reg *d, intptr_t oprsz,
+                                            intptr_t maxsz)
 {
-    int shift;
+    intptr_t i;
 
-    if (s->Q(0) > 15) {
-        d->Q(0) = 0;
-#if SHIFT == 1
-        d->Q(1) = 0;
-#endif
-    } else {
-        shift = s->B(0);
-        d->W(0) >>= shift;
-        d->W(1) >>= shift;
-        d->W(2) >>= shift;
-        d->W(3) >>= shift;
-#if SHIFT == 1
-        d->W(4) >>= shift;
-        d->W(5) >>= shift;
-        d->W(6) >>= shift;
-        d->W(7) >>= shift;
-#endif
+    assert(oprsz % sizeof(uint64_t) == 0);
+    assert(maxsz % sizeof(uint64_t) == 0);
+
+    if (oprsz < maxsz) {
+        i = oprsz / sizeof(uint64_t);
+        for (; i * sizeof(uint64_t) < maxsz; ++i) {
+            d->Q(i) = 0;
+        }
     }
 }
 
-void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllw, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc)
 {
-    int shift;
+    const uint64_t count = b->Q(0);
+    const intptr_t oprsz = count > 15 ? 0 : simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
 
-    if (s->Q(0) > 15) {
-        shift = 15;
-    } else {
-        shift = s->B(0);
+    for (intptr_t i = 0; i * sizeof(uint16_t) < oprsz; ++i) {
+        d->W(i) = a->W(i) << count;
     }
-    d->W(0) = (int16_t)d->W(0) >> shift;
-    d->W(1) = (int16_t)d->W(1) >> shift;
-    d->W(2) = (int16_t)d->W(2) >> shift;
-    d->W(3) = (int16_t)d->W(3) >> shift;
-#if SHIFT == 1
-    d->W(4) = (int16_t)d->W(4) >> shift;
-    d->W(5) = (int16_t)d->W(5) >> shift;
-    d->W(6) = (int16_t)d->W(6) >> shift;
-    d->W(7) = (int16_t)d->W(7) >> shift;
-#endif
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
 }
 
-void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslld, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc)
 {
-    int shift;
+    const uint64_t count = b->Q(0);
+    const intptr_t oprsz = count > 31 ? 0 : simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
 
-    if (s->Q(0) > 15) {
-        d->Q(0) = 0;
-#if SHIFT == 1
-        d->Q(1) = 0;
-#endif
-    } else {
-        shift = s->B(0);
-        d->W(0) <<= shift;
-        d->W(1) <<= shift;
-        d->W(2) <<= shift;
-        d->W(3) <<= shift;
-#if SHIFT == 1
-        d->W(4) <<= shift;
-        d->W(5) <<= shift;
-        d->W(6) <<= shift;
-        d->W(7) <<= shift;
-#endif
+    for (intptr_t i = 0; i * sizeof(uint32_t) < oprsz; ++i) {
+        d->L(i) = a->L(i) << count;
     }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
 }
 
-void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllq, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc)
 {
-    int shift;
+    const uint64_t count = b->Q(0);
+    const intptr_t oprsz = count > 63 ? 0 : simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
 
-    if (s->Q(0) > 31) {
-        d->Q(0) = 0;
-#if SHIFT == 1
-        d->Q(1) = 0;
-#endif
-    } else {
-        shift = s->B(0);
-        d->L(0) >>= shift;
-        d->L(1) >>= shift;
-#if SHIFT == 1
-        d->L(2) >>= shift;
-        d->L(3) >>= shift;
-#endif
+    for (intptr_t i = 0; i * sizeof(uint64_t) < oprsz; ++i) {
+        d->Q(i) = a->Q(i) << count;
     }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
 }
 
-void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllwi, SUFFIX)(Reg *d, Reg *a, uint32_t desc)
 {
-    int shift;
+    const uint64_t count = simd_data(desc);
+    const intptr_t oprsz = count > 15 ? 0 : simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
 
-    if (s->Q(0) > 31) {
-        shift = 31;
-    } else {
-        shift = s->B(0);
+    for (intptr_t i = 0; i * sizeof(uint16_t) < oprsz; ++i) {
+        d->W(i) = a->W(i) << count;
     }
-    d->L(0) = (int32_t)d->L(0) >> shift;
-    d->L(1) = (int32_t)d->L(1) >> shift;
-#if SHIFT == 1
-    d->L(2) = (int32_t)d->L(2) >> shift;
-    d->L(3) = (int32_t)d->L(3) >> shift;
-#endif
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
 }
 
-void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslldi, SUFFIX)(Reg *d, Reg *a, uint32_t desc)
 {
-    int shift;
+    const uint64_t count = simd_data(desc);
+    const intptr_t oprsz = count > 31 ? 0 : simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
 
-    if (s->Q(0) > 31) {
-        d->Q(0) = 0;
-#if SHIFT == 1
-        d->Q(1) = 0;
-#endif
-    } else {
-        shift = s->B(0);
-        d->L(0) <<= shift;
-        d->L(1) <<= shift;
-#if SHIFT == 1
-        d->L(2) <<= shift;
-        d->L(3) <<= shift;
-#endif
+    for (intptr_t i = 0; i * sizeof(uint32_t) < oprsz; ++i) {
+        d->L(i) = a->L(i) << count;
     }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
 }
 
-void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllqi, SUFFIX)(Reg *d, Reg *a, uint32_t desc)
 {
-    int shift;
+    const uint64_t count = simd_data(desc);
+    const intptr_t oprsz = count > 63 ? 0 : simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
 
-    if (s->Q(0) > 63) {
-        d->Q(0) = 0;
-#if SHIFT == 1
-        d->Q(1) = 0;
-#endif
-    } else {
-        shift = s->B(0);
-        d->Q(0) >>= shift;
-#if SHIFT == 1
-        d->Q(1) >>= shift;
-#endif
+    for (intptr_t i = 0; i * sizeof(uint64_t) < oprsz; ++i) {
+        d->Q(i) = a->Q(i) << count;
     }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
 }
 
-void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc)
 {
-    int shift;
+    const uint64_t count = b->Q(0);
+    const intptr_t oprsz = count > 15 ? 0 : simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
 
-    if (s->Q(0) > 63) {
-        d->Q(0) = 0;
-#if SHIFT == 1
-        d->Q(1) = 0;
-#endif
-    } else {
-        shift = s->B(0);
-        d->Q(0) <<= shift;
-#if SHIFT == 1
-        d->Q(1) <<= shift;
-#endif
+    for (intptr_t i = 0; i * sizeof(uint16_t) < oprsz; ++i) {
+        d->W(i) = a->W(i) >> count;
     }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
 }
 
-#if SHIFT == 1
-void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrld, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc)
+{
+    const uint64_t count = b->Q(0);
+    const intptr_t oprsz = count > 31 ? 0 : simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
+
+    for (intptr_t i = 0; i * sizeof(uint32_t) < oprsz; ++i) {
+        d->L(i) = a->L(i) >> count;
+    }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
+}
+
+void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc)
+{
+    const uint64_t count = b->Q(0);
+    const intptr_t oprsz = count > 63 ? 0 : simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
+
+    for (intptr_t i = 0; i * sizeof(uint64_t) < oprsz; ++i) {
+        d->Q(i) = a->Q(i) >> count;
+    }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
+}
+
+void glue(helper_psrlwi, SUFFIX)(Reg *d, Reg *a, uint32_t desc)
+{
+    const uint64_t count = simd_data(desc);
+    const intptr_t oprsz = count > 15 ? 0 : simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
+
+    for (intptr_t i = 0; i * sizeof(uint16_t) < oprsz; ++i) {
+        d->W(i) = a->W(i) >> count;
+    }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
+}
+
+void glue(helper_psrldi, SUFFIX)(Reg *d, Reg *a, uint32_t desc)
+{
+    const uint64_t count = simd_data(desc);
+    const intptr_t oprsz = count > 31 ? 0 : simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
+
+    for (intptr_t i = 0; i * sizeof(uint32_t) < oprsz; ++i) {
+        d->L(i) = a->L(i) >> count;
+    }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
+}
+
+void glue(helper_psrlqi, SUFFIX)(Reg *d, Reg *a, uint32_t desc)
 {
-    int shift, i;
+    const uint64_t count = simd_data(desc);
+    const intptr_t oprsz = count > 63 ? 0 : simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
 
-    shift = s->L(0);
-    if (shift > 16) {
-        shift = 16;
+    for (intptr_t i = 0; i * sizeof(uint64_t) < oprsz; ++i) {
+        d->Q(i) = a->Q(i) >> count;
     }
-    for (i = 0; i < 16 - shift; i++) {
-        d->B(i) = d->B(i + shift);
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
+}
+
+void glue(helper_psraw, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc)
+{
+    const intptr_t oprsz = simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
+
+    uint64_t count = b->Q(0);
+    if (count > 15) {
+        count = 15;
+    }
+
+    for (intptr_t i = 0; i * sizeof(uint16_t) < oprsz; ++i) {
+        d->W(i) = (int16_t)a->W(i) >> count;
+    }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
+}
+
+void glue(helper_psrad, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc)
+{
+    const intptr_t oprsz = simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
+
+    uint64_t count = b->Q(0);
+    if (count > 31) {
+        count = 31;
     }
-    for (i = 16 - shift; i < 16; i++) {
-        d->B(i) = 0;
+
+    for (intptr_t i = 0; i * sizeof(uint32_t) < oprsz; ++i) {
+        d->L(i) = (int32_t)a->L(i) >> count;
     }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
 }
 
-void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrawi, SUFFIX)(Reg *d, Reg *a, uint32_t desc)
 {
-    int shift, i;
+    const intptr_t oprsz = simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
 
-    shift = s->L(0);
-    if (shift > 16) {
-        shift = 16;
+    uint64_t count = simd_data(desc);
+    if (count > 15) {
+        count = 15;
     }
-    for (i = 15; i >= shift; i--) {
-        d->B(i) = d->B(i - shift);
+
+    for (intptr_t i = 0; i * sizeof(uint16_t) < oprsz; ++i) {
+        d->W(i) = (int16_t)a->W(i) >> count;
     }
-    for (i = 0; i < shift; i++) {
-        d->B(i) = 0;
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
+}
+
+void glue(helper_psradi, SUFFIX)(Reg *d, Reg *a, uint32_t desc)
+{
+    const intptr_t oprsz = simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
+
+    uint64_t count = simd_data(desc);
+    if (count > 31) {
+        count = 31;
+    }
+
+    for (intptr_t i = 0; i * sizeof(uint32_t) < oprsz; ++i) {
+        d->L(i) = (int32_t)a->L(i) >> count;
+    }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
+}
+
+#if SHIFT == 1
+void glue(helper_pslldqi, SUFFIX)(Reg *d, Reg *a, uint32_t desc)
+{
+    const intptr_t oprsz = simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
+
+    unsigned int count = simd_data(desc);
+    if (count > 16) {
+        count = 16;
+    }
+
+    for (intptr_t i = 0; i < oprsz; i += 16) {
+        intptr_t j = 15;
+        for (; count <= j; --j) {
+            d->B(i + j) = a->B(i + j - count);
+        }
+        for (; 0 <= j; --j) {
+            d->B(i + j) = 0;
+        }
+    }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
+}
+
+void glue(helper_psrldqi, SUFFIX)(Reg *d, Reg *a, uint32_t desc)
+{
+    const intptr_t oprsz = simd_oprsz(desc);
+    const intptr_t maxsz = simd_maxsz(desc);
+
+    unsigned int count = simd_data(desc);
+    if (count > 16) {
+        count = 16;
+    }
+
+    for (intptr_t i = 0; i < oprsz; i += 16) {
+        intptr_t j = 0;
+        for (; j + count < 16; ++j) {
+            d->B(i + j) = a->B(i + j + count);
+        }
+        for (; j < 16; ++j) {
+            d->B(i + j) = 0;
+        }
     }
+    glue(clear_high, SUFFIX)(d, oprsz, maxsz);
 }
 #endif
 
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index afa0ad0938..724692a689 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -34,18 +34,28 @@
 #define dh_is_signed_ZMMReg dh_is_signed_ptr
 #define dh_is_signed_MMXReg dh_is_signed_ptr
 
-DEF_HELPER_3(glue(psrlw, SUFFIX), void, env, Reg, Reg)
-DEF_HELPER_3(glue(psraw, SUFFIX), void, env, Reg, Reg)
-DEF_HELPER_3(glue(psllw, SUFFIX), void, env, Reg, Reg)
-DEF_HELPER_3(glue(psrld, SUFFIX), void, env, Reg, Reg)
-DEF_HELPER_3(glue(psrad, SUFFIX), void, env, Reg, Reg)
-DEF_HELPER_3(glue(pslld, SUFFIX), void, env, Reg, Reg)
-DEF_HELPER_3(glue(psrlq, SUFFIX), void, env, Reg, Reg)
-DEF_HELPER_3(glue(psllq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(psllw, SUFFIX), void, Reg, Reg, Reg, i32)
+DEF_HELPER_4(glue(pslld, SUFFIX), void, Reg, Reg, Reg, i32)
+DEF_HELPER_4(glue(psllq, SUFFIX), void, Reg, Reg, Reg, i32)
+DEF_HELPER_3(glue(psllwi, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pslldi, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(psllqi, SUFFIX), void, Reg, Reg, i32)
+
+DEF_HELPER_4(glue(psrlw, SUFFIX), void, Reg, Reg, Reg, i32)
+DEF_HELPER_4(glue(psrld, SUFFIX), void, Reg, Reg, Reg, i32)
+DEF_HELPER_4(glue(psrlq, SUFFIX), void, Reg, Reg, Reg, i32)
+DEF_HELPER_3(glue(psrlwi, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(psrldi, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(psrlqi, SUFFIX), void, Reg, Reg, i32)
+
+DEF_HELPER_4(glue(psraw, SUFFIX), void, Reg, Reg, Reg, i32)
+DEF_HELPER_4(glue(psrad, SUFFIX), void, Reg, Reg, Reg, i32)
+DEF_HELPER_3(glue(psrawi, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(psradi, SUFFIX), void, Reg, Reg, i32)
 
 #if SHIFT == 1
-DEF_HELPER_3(glue(psrldq, SUFFIX), void, env, Reg, Reg)
-DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(pslldqi, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(psrldqi, SUFFIX), void, Reg, Reg, i32)
 #endif
 
 DEF_HELPER_3(glue(pmullw, SUFFIX), void, env, Reg, Reg)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index c7e664e798..03f7c6e450 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2801,24 +2801,16 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
     [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
     [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
     [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
-    [0xd1] = MMX_OP2(psrlw),
-    [0xd2] = MMX_OP2(psrld),
-    [0xd3] = MMX_OP2(psrlq),
     [0xd5] = MMX_OP2(pmullw),
     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
     [0xe0] = MMX_OP2(pavgb),
-    [0xe1] = MMX_OP2(psraw),
-    [0xe2] = MMX_OP2(psrad),
     [0xe3] = MMX_OP2(pavgw),
     [0xe4] = MMX_OP2(pmulhuw),
     [0xe5] = MMX_OP2(pmulhw),
     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
-    [0xf1] = MMX_OP2(psllw),
-    [0xf2] = MMX_OP2(pslld),
-    [0xf3] = MMX_OP2(psllq),
     [0xf4] = MMX_OP2(pmuludq),
     [0xf5] = MMX_OP2(pmaddwd),
     [0xf6] = MMX_OP2(psadbw),
@@ -2826,19 +2818,6 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
 };
 
-static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
-    [0 + 2] = MMX_OP2(psrlw),
-    [0 + 4] = MMX_OP2(psraw),
-    [0 + 6] = MMX_OP2(psllw),
-    [8 + 2] = MMX_OP2(psrld),
-    [8 + 4] = MMX_OP2(psrad),
-    [8 + 6] = MMX_OP2(pslld),
-    [16 + 2] = MMX_OP2(psrlq),
-    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
-    [16 + 6] = MMX_OP2(psllq),
-    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
-};
-
 static const SSEFunc_0_epi sse_op_table3ai[] = {
     gen_helper_cvtsi2ss,
     gen_helper_cvtsi2sd
@@ -3403,49 +3382,6 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b)
                 goto illegal_op;
             }
             break;
-        case 0x71: /* shift mm, im */
-        case 0x72:
-        case 0x73:
-        case 0x171: /* shift xmm, im */
-        case 0x172:
-        case 0x173:
-            if (b1 >= 2) {
-                goto unknown_op;
-            }
-            val = x86_ldub_code(env, s);
-            if (is_xmm) {
-                tcg_gen_movi_tl(s->T0, val);
-                tcg_gen_st32_tl(s->T0, cpu_env,
-                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
-                tcg_gen_movi_tl(s->T0, 0);
-                tcg_gen_st32_tl(s->T0, cpu_env,
-                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
-                op1_offset = offsetof(CPUX86State,xmm_t0);
-            } else {
-                tcg_gen_movi_tl(s->T0, val);
-                tcg_gen_st32_tl(s->T0, cpu_env,
-                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
-                tcg_gen_movi_tl(s->T0, 0);
-                tcg_gen_st32_tl(s->T0, cpu_env,
-                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
-                op1_offset = offsetof(CPUX86State,mmx_t0);
-            }
-            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
-                                       (((modrm >> 3)) & 7)][b1];
-            if (!sse_fn_epp) {
-                goto unknown_op;
-            }
-            if (is_xmm) {
-                rm = (modrm & 7) | REX_B(s);
-                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
-            } else {
-                rm = (modrm & 7);
-                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
-            }
-            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
-            tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
-            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
-            break;
         case 0x050: /* movmskps */
             rm = (modrm & 7) | REX_B(s);
             tcg_gen_addi_ptr(s->ptr0, cpu_env,
@@ -6889,18 +6825,18 @@ DEF_GEN_INSN3_GVEC(xorpd, Vdq, Vdq, Wdq, xor, XMM_OPRSZ, XMM_MAXSZ, MO_64)
 DEF_GEN_INSN3_GVEC(vxorpd, Vdq, Hdq, Wdq, xor, XMM_OPRSZ, XMM_MAXSZ, MO_64)
 DEF_GEN_INSN3_GVEC(vxorpd, Vqq, Hqq, Wqq, xor, XMM_OPRSZ, XMM_MAXSZ, MO_64)
 
-DEF_GEN_INSN3_HELPER_EPP(psllw, psllw_mmx, Pq, Pq, Qq)
-DEF_GEN_INSN3_HELPER_EPP(psllw, psllw_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsllw, psllw_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsllw, psllw_xmm, Vqq, Hqq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(pslld, pslld_mmx, Pq, Pq, Qq)
-DEF_GEN_INSN3_HELPER_EPP(pslld, pslld_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpslld, pslld_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpslld, pslld_xmm, Vqq, Hqq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(psllq, psllq_mmx, Pq, Pq, Qq)
-DEF_GEN_INSN3_HELPER_EPP(psllq, psllq_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsllq, psllq_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsllq, psllq_xmm, Vqq, Hqq, Wdq)
+DEF_GEN_INSN3_GVEC(psllw, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, psllw_mmx)
+DEF_GEN_INSN3_GVEC(psllw, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psllw_xmm)
+DEF_GEN_INSN3_GVEC(vpsllw, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psllw_xmm)
+DEF_GEN_INSN3_GVEC(vpsllw, Vqq, Hqq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psllw_xmm)
+DEF_GEN_INSN3_GVEC(pslld, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, pslld_mmx)
+DEF_GEN_INSN3_GVEC(pslld, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pslld_xmm)
+DEF_GEN_INSN3_GVEC(vpslld, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pslld_xmm)
+DEF_GEN_INSN3_GVEC(vpslld, Vqq, Hqq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pslld_xmm)
+DEF_GEN_INSN3_GVEC(psllq, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, psllq_mmx)
+DEF_GEN_INSN3_GVEC(psllq, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psllq_xmm)
+DEF_GEN_INSN3_GVEC(vpsllq, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psllq_xmm)
+DEF_GEN_INSN3_GVEC(vpsllq, Vqq, Hqq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psllq_xmm)
 
 GEN_INSN3(vpsllvd, Vdq, Hdq, Wdq)
 {
@@ -6920,21 +6856,18 @@ GEN_INSN3(vpsllvq, Vqq, Hqq, Wqq)
     /* XXX TODO implement this */
 }
 
-DEF_GEN_INSN3_HELPER_EPP(pslldq, pslldq_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpslldq, pslldq_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpslldq, pslldq_xmm, Vqq, Hqq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(psrlw, psrlw_mmx, Pq, Pq, Qq)
-DEF_GEN_INSN3_HELPER_EPP(psrlw, psrlw_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsrlw, psrlw_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsrlw, psrlw_xmm, Vqq, Hqq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(psrld, psrld_mmx, Pq, Pq, Qq)
-DEF_GEN_INSN3_HELPER_EPP(psrld, psrld_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsrld, psrld_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsrld, psrld_xmm, Vqq, Hqq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(psrlq, psrlq_mmx, Pq, Pq, Qq)
-DEF_GEN_INSN3_HELPER_EPP(psrlq, psrlq_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsrlq, psrlq_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsrlq, psrlq_xmm, Vqq, Hqq, Wdq)
+DEF_GEN_INSN3_GVEC(psrlw, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, psrlw_mmx)
+DEF_GEN_INSN3_GVEC(psrlw, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psrlw_xmm)
+DEF_GEN_INSN3_GVEC(vpsrlw, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psrlw_xmm)
+DEF_GEN_INSN3_GVEC(vpsrlw, Vqq, Hqq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psrlw_xmm)
+DEF_GEN_INSN3_GVEC(psrld, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, psrld_mmx)
+DEF_GEN_INSN3_GVEC(psrld, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psrld_xmm)
+DEF_GEN_INSN3_GVEC(vpsrld, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psrld_xmm)
+DEF_GEN_INSN3_GVEC(vpsrld, Vqq, Hqq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psrld_xmm)
+DEF_GEN_INSN3_GVEC(psrlq, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, psrlq_mmx)
+DEF_GEN_INSN3_GVEC(psrlq, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psrlq_xmm)
+DEF_GEN_INSN3_GVEC(vpsrlq, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psrlq_xmm)
+DEF_GEN_INSN3_GVEC(vpsrlq, Vqq, Hqq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psrlq_xmm)
 
 GEN_INSN3(vpsrlvd, Vdq, Hdq, Wdq)
 {
@@ -6954,17 +6887,14 @@ GEN_INSN3(vpsrlvq, Vqq, Hqq, Wqq)
     /* XXX TODO implement this */
 }
 
-DEF_GEN_INSN3_HELPER_EPP(psrldq, psrldq_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsrldq, psrldq_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsrldq, psrldq_xmm, Vqq, Hqq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(psraw, psraw_mmx, Pq, Pq, Qq)
-DEF_GEN_INSN3_HELPER_EPP(psraw, psraw_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsraw, psraw_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsraw, psraw_xmm, Vqq, Hqq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(psrad, psrad_mmx, Pq, Pq, Qq)
-DEF_GEN_INSN3_HELPER_EPP(psrad, psrad_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsrad, psrad_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpsrad, psrad_xmm, Vqq, Hqq, Wdq)
+DEF_GEN_INSN3_GVEC(psraw, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, psraw_mmx)
+DEF_GEN_INSN3_GVEC(psraw, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psraw_xmm)
+DEF_GEN_INSN3_GVEC(vpsraw, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psraw_xmm)
+DEF_GEN_INSN3_GVEC(vpsraw, Vqq, Hqq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psraw_xmm)
+DEF_GEN_INSN3_GVEC(psrad, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, psrad_mmx)
+DEF_GEN_INSN3_GVEC(psrad, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psrad_xmm)
+DEF_GEN_INSN3_GVEC(vpsrad, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psrad_xmm)
+DEF_GEN_INSN3_GVEC(vpsrad, Vqq, Hqq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, psrad_xmm)
 
 GEN_INSN3(vpsravd, Vdq, Hdq, Wdq)
 {
@@ -6975,93 +6905,44 @@ GEN_INSN3(vpsravd, Vqq, Hqq, Wqq)
     /* XXX TODO implement this */
 }
 
-#define DEF_GEN_PSHIFT_IMM_MM(mnem, opT1, opT2)                         \
-    GEN_INSN3(mnem, opT1, opT2, Ib)                                     \
-    {                                                                   \
-        const uint64_t arg3_ui64 = (uint8_t)arg3;                       \
-        const insnop_arg_t(Eq) arg3_r64 = s->tmp1_i64;                  \
-        const insnop_arg_t(Qq) arg3_mm =                                \
-            offsetof(CPUX86State, mmx_t0.MMX_Q(0));                     \
-                                                                        \
-        tcg_gen_movi_i64(arg3_r64, arg3_ui64);                          \
-        gen_insn2(movq, Pq, Eq)(env, s, arg3_mm, arg3_r64);             \
-        gen_insn3(mnem, Pq, Pq, Qq)(env, s, arg1, arg2, arg3_mm);       \
-    }
-#define DEF_GEN_PSHIFT_IMM_XMM(mnem, opT1, opT2)                        \
-    GEN_INSN3(mnem, opT1, opT2, Ib)                                     \
-    {                                                                   \
-        const uint64_t arg3_ui64 = (uint8_t)arg3;                       \
-        const insnop_arg_t(Eq) arg3_r64 = s->tmp1_i64;                  \
-        const insnop_arg_t(Wdq) arg3_xmm =                              \
-            offsetof(CPUX86State, xmm_t0.ZMM_Q(0));                     \
-                                                                        \
-        tcg_gen_movi_i64(arg3_r64, arg3_ui64);                          \
-        gen_insn2(movq, Vdq, Eq)(env, s, arg3_xmm, arg3_r64);           \
-        gen_insn3(mnem, Vdq, Vdq, Wdq)(env, s, arg1, arg2, arg3_xmm);   \
-    }
-#define DEF_GEN_VPSHIFT_IMM_XMM(mnem, opT1, opT2)                       \
-    GEN_INSN3(mnem, opT1, opT2, Ib)                                     \
-    {                                                                   \
-        const uint64_t arg3_ui64 = (uint8_t)arg3;                       \
-        const insnop_arg_t(Eq) arg3_r64 = s->tmp1_i64;                  \
-        const insnop_arg_t(Wdq) arg3_xmm =                              \
-            offsetof(CPUX86State, xmm_t0.ZMM_Q(0));                     \
-                                                                        \
-        tcg_gen_movi_i64(arg3_r64, arg3_ui64);                          \
-        gen_insn2(movq, Vdq, Eq)(env, s, arg3_xmm, arg3_r64);           \
-        gen_insn3(mnem, Vdq, Hdq, Wdq)(env, s, arg2, arg2, arg3_xmm);   \
-    }
-#define DEF_GEN_VPSHIFT_IMM_YMM(mnem, opT1, opT2)                       \
-    GEN_INSN3(mnem, opT1, opT2, Ib)                                     \
-    {                                                                   \
-        const uint64_t arg3_ui64 = (uint8_t)arg3;                       \
-        const insnop_arg_t(Eq) arg3_r64 = s->tmp1_i64;                  \
-        const insnop_arg_t(Wdq) arg3_xmm =                              \
-            offsetof(CPUX86State, xmm_t0.ZMM_Q(0));                     \
-                                                                        \
-        tcg_gen_movi_i64(arg3_r64, arg3_ui64);                          \
-        gen_insn2(movq, Vdq, Eq)(env, s, arg3_xmm, arg3_r64);           \
-        gen_insn3(mnem, Vqq, Hqq, Wdq)(env, s, arg2, arg2, arg3_xmm);   \
-    }
-
-DEF_GEN_PSHIFT_IMM_MM(psllw, Nq, Nq)
-DEF_GEN_PSHIFT_IMM_XMM(psllw, Udq, Udq)
-DEF_GEN_VPSHIFT_IMM_XMM(vpsllw, Hdq, Udq)
-DEF_GEN_VPSHIFT_IMM_YMM(vpsllw, Hqq, Uqq)
-DEF_GEN_PSHIFT_IMM_MM(pslld, Nq, Nq)
-DEF_GEN_PSHIFT_IMM_XMM(pslld, Udq, Udq)
-DEF_GEN_VPSHIFT_IMM_XMM(vpslld, Hdq, Udq)
-DEF_GEN_VPSHIFT_IMM_YMM(vpslld, Hqq, Uqq)
-DEF_GEN_PSHIFT_IMM_MM(psllq, Nq, Nq)
-DEF_GEN_PSHIFT_IMM_XMM(psllq, Udq, Udq)
-DEF_GEN_VPSHIFT_IMM_XMM(vpsllq, Hdq, Udq)
-DEF_GEN_VPSHIFT_IMM_YMM(vpsllq, Hqq, Uqq)
-DEF_GEN_PSHIFT_IMM_XMM(pslldq, Udq, Udq)
-DEF_GEN_VPSHIFT_IMM_XMM(vpslldq, Hdq, Udq)
-DEF_GEN_VPSHIFT_IMM_YMM(vpslldq, Hqq, Uqq)
-DEF_GEN_PSHIFT_IMM_MM(psrlw, Nq, Nq)
-DEF_GEN_PSHIFT_IMM_XMM(psrlw, Udq, Udq)
-DEF_GEN_VPSHIFT_IMM_XMM(vpsrlw, Hdq, Udq)
-DEF_GEN_VPSHIFT_IMM_YMM(vpsrlw, Hqq, Uqq)
-DEF_GEN_PSHIFT_IMM_MM(psrld, Nq, Nq)
-DEF_GEN_PSHIFT_IMM_XMM(psrld, Udq, Udq)
-DEF_GEN_VPSHIFT_IMM_XMM(vpsrld, Hdq, Udq)
-DEF_GEN_VPSHIFT_IMM_YMM(vpsrld, Hqq, Uqq)
-DEF_GEN_PSHIFT_IMM_MM(psrlq, Nq, Nq)
-DEF_GEN_PSHIFT_IMM_XMM(psrlq, Udq, Udq)
-DEF_GEN_VPSHIFT_IMM_XMM(vpsrlq, Hdq, Udq)
-DEF_GEN_VPSHIFT_IMM_YMM(vpsrlq, Hqq, Uqq)
-DEF_GEN_PSHIFT_IMM_XMM(psrldq, Udq, Udq)
-DEF_GEN_VPSHIFT_IMM_XMM(vpsrldq, Hdq, Udq)
-DEF_GEN_VPSHIFT_IMM_YMM(vpsrldq, Hqq, Uqq)
-DEF_GEN_PSHIFT_IMM_MM(psraw, Nq, Nq)
-DEF_GEN_PSHIFT_IMM_XMM(psraw, Udq, Udq)
-DEF_GEN_VPSHIFT_IMM_XMM(vpsraw, Hdq, Udq)
-DEF_GEN_VPSHIFT_IMM_XMM(vpsraw, Hqq, Uqq)
-DEF_GEN_PSHIFT_IMM_MM(psrad, Nq, Nq)
-DEF_GEN_PSHIFT_IMM_XMM(psrad, Udq, Udq)
-DEF_GEN_VPSHIFT_IMM_XMM(vpsrad, Hdq, Udq)
-DEF_GEN_VPSHIFT_IMM_XMM(vpsrad, Hqq, Uqq)
+DEF_GEN_INSN3_GVEC(psllw, Nq, Nq, Ib, 2i_ool, MM_OPRSZ, MM_MAXSZ, psllwi_xmm)
+DEF_GEN_INSN3_GVEC(psllw, Udq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psllwi_xmm)
+DEF_GEN_INSN3_GVEC(vpsllw, Hdq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psllwi_xmm)
+DEF_GEN_INSN3_GVEC(vpsllw, Hqq, Uqq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psllwi_xmm)
+DEF_GEN_INSN3_GVEC(pslld, Nq, Nq, Ib, 2i_ool, MM_OPRSZ, MM_MAXSZ, pslldi_xmm)
+DEF_GEN_INSN3_GVEC(pslld, Udq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, pslldi_xmm)
+DEF_GEN_INSN3_GVEC(vpslld, Hdq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, pslldi_xmm)
+DEF_GEN_INSN3_GVEC(vpslld, Hqq, Uqq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, pslldi_xmm)
+DEF_GEN_INSN3_GVEC(psllq, Nq, Nq, Ib, 2i_ool, MM_OPRSZ, MM_MAXSZ, psllqi_xmm)
+DEF_GEN_INSN3_GVEC(psllq, Udq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psllqi_xmm)
+DEF_GEN_INSN3_GVEC(vpsllq, Hdq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psllqi_xmm)
+DEF_GEN_INSN3_GVEC(vpsllq, Hqq, Uqq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psllqi_xmm)
+DEF_GEN_INSN3_GVEC(pslldq, Udq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, pslldqi_xmm)
+DEF_GEN_INSN3_GVEC(vpslldq, Hdq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, pslldqi_xmm)
+DEF_GEN_INSN3_GVEC(vpslldq, Hqq, Uqq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, pslldqi_xmm)
+DEF_GEN_INSN3_GVEC(psrlw, Nq, Nq, Ib, 2i_ool, MM_OPRSZ, MM_MAXSZ, psrlwi_xmm)
+DEF_GEN_INSN3_GVEC(psrlw, Udq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrlwi_xmm)
+DEF_GEN_INSN3_GVEC(vpsrlw, Hdq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrlwi_xmm)
+DEF_GEN_INSN3_GVEC(vpsrlw, Hqq, Uqq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrlwi_xmm)
+DEF_GEN_INSN3_GVEC(psrld, Nq, Nq, Ib, 2i_ool, MM_OPRSZ, MM_MAXSZ, psrldi_xmm)
+DEF_GEN_INSN3_GVEC(psrld, Udq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrldi_xmm)
+DEF_GEN_INSN3_GVEC(vpsrld, Hdq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrldi_xmm)
+DEF_GEN_INSN3_GVEC(vpsrld, Hqq, Uqq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrldi_xmm)
+DEF_GEN_INSN3_GVEC(psrlq, Nq, Nq, Ib, 2i_ool, MM_OPRSZ, MM_MAXSZ, psrlqi_xmm)
+DEF_GEN_INSN3_GVEC(psrlq, Udq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrlqi_xmm)
+DEF_GEN_INSN3_GVEC(vpsrlq, Hdq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrlqi_xmm)
+DEF_GEN_INSN3_GVEC(vpsrlq, Hqq, Uqq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrlqi_xmm)
+DEF_GEN_INSN3_GVEC(psrldq, Udq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrldqi_xmm)
+DEF_GEN_INSN3_GVEC(vpsrldq, Hdq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrldqi_xmm)
+DEF_GEN_INSN3_GVEC(vpsrldq, Hqq, Uqq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrldqi_xmm)
+DEF_GEN_INSN3_GVEC(psraw, Nq, Nq, Ib, 2i_ool, MM_OPRSZ, MM_MAXSZ, psrawi_xmm)
+DEF_GEN_INSN3_GVEC(psraw, Udq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrawi_xmm)
+DEF_GEN_INSN3_GVEC(vpsraw, Hdq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrawi_xmm)
+DEF_GEN_INSN3_GVEC(vpsraw, Hqq, Uqq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psrawi_xmm)
+DEF_GEN_INSN3_GVEC(psrad, Nq, Nq, Ib, 2i_ool, MM_OPRSZ, MM_MAXSZ, psradi_xmm)
+DEF_GEN_INSN3_GVEC(psrad, Udq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psradi_xmm)
+DEF_GEN_INSN3_GVEC(vpsrad, Hdq, Udq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psradi_xmm)
+DEF_GEN_INSN3_GVEC(vpsrad, Hqq, Uqq, Ib, 2i_ool, XMM_OPRSZ, XMM_MAXSZ, psradi_xmm)
 
 DEF_GEN_INSN4_HELPER_EPPI(palignr, palignr_mmx, Pq, Pq, Qq, Ib)
 DEF_GEN_INSN4_HELPER_EPPI(palignr, palignr_xmm, Vdq, Vdq, Wdq, Ib)
-- 
2.20.1



  parent reply	other threads:[~2019-08-21 18:38 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-21 17:28 [Qemu-devel] [RFC PATCH v4 00/75] rewrite MMX/SSE*/AVX/AVX2 vector instruction translation Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 01/75] target/i386: Push rex_r into DisasContext Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 02/75] target/i386: Push rex_w " Jan Bobek
2019-08-22  4:07   ` Aleksandar Markovic
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 03/75] target/i386: use dflag from DisasContext Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 04/75] target/i386: use prefix " Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 05/75] target/i386: introduce disas_insn_prefix Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 06/75] target/i386: Simplify gen_exception arguments Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 07/75] target/i386: use pc_start from DisasContext Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 08/75] target/i386: make variable b1 const Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 09/75] target/i386: make variable is_xmm const Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 10/75] target/i386: add vector register file alignment constraints Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 11/75] target/i386: introduce gen_sse_ng Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 12/75] target/i386: introduce CASES_* macros in gen_sse_ng Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 13/75] target/i386: decode the 0F38/0F3A prefix " Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 14/75] target/i386: introduce aliases for some tcg_gvec operations Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 15/75] target/i386: introduce function check_cpuid Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 16/75] target/i386: disable AVX/AVX2 cpuid bitchecks Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 17/75] target/i386: introduce instruction operand infrastructure Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 18/75] target/i386: introduce generic operand alias Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 19/75] target/i386: introduce generic either-or operand Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 20/75] target/i386: introduce generic load-store operand Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 21/75] target/i386: introduce tcg register operands Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 22/75] target/i386: introduce modrm operand Jan Bobek
2019-08-21 17:28 ` [Qemu-devel] [RFC PATCH v4 23/75] target/i386: introduce operands for decoding modrm fields Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 24/75] target/i386: introduce operand for direct-only r/m field Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 25/75] target/i386: introduce Ib (immediate) operand Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 26/75] target/i386: introduce M* (memptr) operands Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 27/75] target/i386: introduce G*, R*, E* (general register) operands Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 28/75] target/i386: introduce P*, N*, Q* (MMX) operands Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 29/75] target/i386: introduce H*, L*, V*, U*, W* (SSE/AVX) operands Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 30/75] target/i386: alias H* operands with the V* operands Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 31/75] target/i386: introduce code generators Jan Bobek
2019-08-22  4:33   ` Aleksandar Markovic
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 32/75] target/i386: introduce helper-based code generator macros Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 33/75] target/i386: introduce gvec-based " Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 34/75] target/i386: introduce sse-opcode.inc.h Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 35/75] target/i386: introduce instruction translator macros Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 36/75] target/i386: introduce MMX translators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 37/75] target/i386: introduce MMX code generators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 38/75] target/i386: introduce MMX vector instructions to sse-opcode.inc.h Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 39/75] target/i386: introduce SSE translators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 40/75] target/i386: introduce SSE code generators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 41/75] target/i386: introduce SSE vector instructions to sse-opcode.inc.h Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 42/75] target/i386: introduce SSE2 translators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 43/75] target/i386: introduce SSE2 code generators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 44/75] target/i386: introduce SSE2 vector instructions to sse-opcode.inc.h Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 45/75] target/i386: introduce SSE3 translators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 46/75] target/i386: introduce SSE3 code generators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 47/75] target/i386: introduce SSE3 vector instructions to sse-opcode.inc.h Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 48/75] target/i386: introduce SSSE3 translators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 49/75] target/i386: introduce SSSE3 code generators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 50/75] target/i386: introduce SSSE3 vector instructions to sse-opcode.inc.h Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 51/75] target/i386: introduce SSE4.1 translators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 52/75] target/i386: introduce SSE4.1 code generators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 53/75] target/i386: introduce SSE4.1 vector instructions to sse-opcode.inc.h Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 54/75] target/i386: introduce SSE4.2 code generators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 55/75] target/i386: introduce SSE4.2 vector instructions to sse-opcode.inc.h Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 57/75] target/i386: introduce AES and PCLMULQDQ code generators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 58/75] target/i386: introduce AES and PCLMULQDQ vector instructions to sse-opcode.inc.h Jan Bobek
2019-08-22  4:02   ` Aleksandar Markovic
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 59/75] target/i386: introduce AVX translators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 60/75] target/i386: introduce AVX code generators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 61/75] target/i386: introduce AVX vector instructions to sse-opcode.inc.h Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 62/75] target/i386: introduce AVX2 translators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 63/75] target/i386: introduce AVX2 code generators Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 64/75] target/i386: introduce AVX2 vector instructions to sse-opcode.inc.h Jan Bobek
2019-08-22  3:54   ` Aleksandar Markovic
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 65/75] target/i386: remove obsoleted helpers Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 66/75] target/i386: cleanup leftovers in ops_sse_header.h Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 67/75] target/i386: introduce aliases for helper-based tcg_gen_gvec_* functions Jan Bobek
2019-08-21 17:29 ` Jan Bobek [this message]
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 69/75] target/i386: convert pmullw/pmulhw/pmulhuw helpers to gvec style Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 70/75] target/i386: convert pavgb/pavgw " Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 71/75] target/i386: convert pmuludq/pmaddwd " Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 72/75] target/i386: convert psadbw helper " Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 73/75] target/i386: remove obsoleted helper_mov(l, q)_mm_T0 Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 74/75] target/i386: convert pshuf(w, lw, hw, d), shuf(pd, ps) helpers to gvec style Jan Bobek
2019-08-21 17:29 ` [Qemu-devel] [RFC PATCH v4 75/75] target/i386: convert pmovmskb/movmskps/movmskpd " Jan Bobek
2019-08-21 23:53   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190821172951.15333-69-jan.bobek@gmail.com \
    --to=jan.bobek@gmail.com \
    --cc=alex.bennee@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).