All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paul Brook <paul@nowt.org>
To: Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Eduardo Habkost <eduardo@habkost.net>
Cc: "open list:All patches CC here" <qemu-devel@nongnu.org>,
	Paul Brook <paul@nowt.org>
Subject: [PATCH v2 35/42] i386: Implement VPERM
Date: Sun, 24 Apr 2022 23:01:57 +0100	[thread overview]
Message-ID: <20220424220204.2493824-36-paul@nowt.org> (raw)
In-Reply-To: <20220418173904.3746036-1-paul@nowt.org>

A set of shuffle operations that operate on complete 256 bit registers.
The integer and floating point variants have identical semantics.

Signed-off-by: Paul Brook <paul@nowt.org>
---
 target/i386/ops_sse.h        | 73 ++++++++++++++++++++++++++++++++++++
 target/i386/ops_sse_header.h |  3 ++
 target/i386/tcg/translate.c  |  9 +++++
 3 files changed, 85 insertions(+)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 14a2d1bf78..04d2006cd8 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -3407,6 +3407,79 @@ void helper_vzeroupper_hi8(CPUX86State *env)
     }
 }
 #endif
+
+void helper_vpermdq_ymm(CPUX86State *env,
+                        Reg *d, Reg *v, Reg *s, uint32_t order)
+{
+    uint64_t r0, r1, r2, r3;
+
+    switch (order & 3) {
+    case 0:
+        r0 = v->Q(0);
+        r1 = v->Q(1);
+        break;
+    case 1:
+        r0 = v->Q(2);
+        r1 = v->Q(3);
+        break;
+    case 2:
+        r0 = s->Q(0);
+        r1 = s->Q(1);
+        break;
+    case 3:
+        r0 = s->Q(2);
+        r1 = s->Q(3);
+        break;
+    }
+    switch ((order >> 4) & 3) {
+    case 0:
+        r2 = v->Q(0);
+        r3 = v->Q(1);
+        break;
+    case 1:
+        r2 = v->Q(2);
+        r3 = v->Q(3);
+        break;
+    case 2:
+        r2 = s->Q(0);
+        r3 = s->Q(1);
+        break;
+    case 3:
+        r2 = s->Q(2);
+        r3 = s->Q(3);
+        break;
+    }
+    d->Q(0) = r0;
+    d->Q(1) = r1;
+    d->Q(2) = r2;
+    d->Q(3) = r3;
+}
+
+void helper_vpermq_ymm(CPUX86State *env, Reg *d, Reg *s, uint32_t order)
+{
+    uint64_t r0, r1, r2, r3;
+    r0 = s->Q(order & 3);
+    r1 = s->Q((order >> 2) & 3);
+    r2 = s->Q((order >> 4) & 3);
+    r3 = s->Q((order >> 6) & 3);
+    d->Q(0) = r0;
+    d->Q(1) = r1;
+    d->Q(2) = r2;
+    d->Q(3) = r3;
+}
+
+void helper_vpermd_ymm(CPUX86State *env, Reg *d, Reg *v, Reg *s)
+{
+    uint32_t r[8];
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        r[i] = s->L(v->L(i) & 7);
+    }
+    for (i = 0; i < 8; i++) {
+        d->L(i) = r[i];
+    }
+}
 #endif
 #endif
 
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index e5d8ea9bb7..099e6e8ffc 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -457,6 +457,9 @@ DEF_HELPER_1(vzeroupper, void, env)
 DEF_HELPER_1(vzeroall_hi8, void, env)
 DEF_HELPER_1(vzeroupper_hi8, void, env)
 #endif
+DEF_HELPER_5(vpermdq_ymm, void, env, Reg, Reg, Reg, i32)
+DEF_HELPER_4(vpermq_ymm, void, env, Reg, Reg, i32)
+DEF_HELPER_4(vpermd_ymm, void, env, Reg, Reg, Reg)
 #endif
 #endif
 
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index fe1ab58d07..5a11d3c083 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3258,6 +3258,8 @@ static const struct SSEOpHelper_table6 sse_op_table6[256] = {
     [0x10] = BLENDV_OP(pblendvb, SSE41, SSE_OPF_MMX),
     [0x14] = BLENDV_OP(blendvps, SSE41, 0),
     [0x15] = BLENDV_OP(blendvpd, SSE41, 0),
+#define gen_helper_vpermd_xmm NULL
+    [0x16] = BINARY_OP(vpermd, AVX, SSE_OPF_AVX2), /* vpermps */
     [0x17] = CMP_OP(ptest, SSE41),
     /* TODO:Some vbroadcast variants require AVX2 */
     [0x18] = UNARY_OP(vbroadcastl, AVX, SSE_OPF_SCALAR), /* vbroadcastss */
@@ -3287,6 +3289,7 @@ static const struct SSEOpHelper_table6 sse_op_table6[256] = {
     [0x33] = UNARY_OP(pmovzxwd, SSE41, SSE_OPF_MMX),
     [0x34] = UNARY_OP(pmovzxwq, SSE41, SSE_OPF_MMX),
     [0x35] = UNARY_OP(pmovzxdq, SSE41, SSE_OPF_MMX),
+    [0x36] = BINARY_OP(vpermd, AVX, SSE_OPF_AVX2), /* vpermd */
     [0x37] = BINARY_OP(pcmpgtq, SSE41, SSE_OPF_MMX),
     [0x38] = BINARY_OP(pminsb, SSE41, SSE_OPF_MMX),
     [0x39] = BINARY_OP(pminsd, SSE41, SSE_OPF_MMX),
@@ -3329,8 +3332,13 @@ static const struct SSEOpHelper_table6 sse_op_table6[256] = {
 
 /* prefix [66] 0f 3a */
 static const struct SSEOpHelper_table7 sse_op_table7[256] = {
+#define gen_helper_vpermq_xmm NULL
+    [0x00] = UNARY_OP(vpermq, AVX, SSE_OPF_AVX2),
+    [0x01] = UNARY_OP(vpermq, AVX, SSE_OPF_AVX2), /* vpermpd */
     [0x04] = UNARY_OP(vpermilps_imm, AVX, 0),
     [0x05] = UNARY_OP(vpermilpd_imm, AVX, 0),
+#define gen_helper_vpermdq_xmm NULL
+    [0x06] = BINARY_OP(vpermdq, AVX, 0), /* vperm2f128 */
     [0x08] = UNARY_OP(roundps, SSE41, 0),
     [0x09] = UNARY_OP(roundpd, SSE41, 0),
 #define gen_helper_roundss_ymm NULL
@@ -3353,6 +3361,7 @@ static const struct SSEOpHelper_table7 sse_op_table7[256] = {
     [0x41] = BINARY_OP(dppd, SSE41, 0),
     [0x42] = BINARY_OP(mpsadbw, SSE41, SSE_OPF_MMX),
     [0x44] = BINARY_OP(pclmulqdq, PCLMULQDQ, 0),
+    [0x46] = BINARY_OP(vpermdq, AVX, SSE_OPF_AVX2), /* vperm2i128 */
 #define gen_helper_pcmpestrm_ymm NULL
     [0x60] = CMP_OP(pcmpestrm, SSE42),
 #define gen_helper_pcmpestri_ymm NULL
-- 
2.36.0



  parent reply	other threads:[~2022-04-24 22:30 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-18 17:39 [PATCH 0/3] AVX guest implementation Paul Brook
2022-04-18 17:39 ` [PATCH 1/4] Add AVX_EN hflag Paul Brook
2022-04-18 17:39 ` [PATCH 2/4] TCG support for AVX Paul Brook
2022-04-18 19:33   ` Peter Maydell
2022-04-18 19:45     ` Paul Brook
2022-04-18 19:50       ` Peter Maydell
2022-04-18 23:14       ` Richard Henderson
2022-04-20 14:19       ` Paolo Bonzini
2022-04-20 18:59         ` Paul Brook
2022-04-18 17:39 ` [PATCH 3/4] Enable all x86-64 cpu features in user mode Paul Brook
2022-04-18 17:39 ` [PATCH 4/4] AVX tests Paul Brook
2022-04-19 10:34   ` Alex Bennée
2022-04-24 22:01 ` [PATCH v2 01/42] i386: pcmpestr 64-bit sign extension bug Paul Brook
2022-04-25 15:50   ` Richard Henderson
2022-04-27  7:00   ` Paolo Bonzini
2022-04-24 22:01 ` [PATCH v2 02/42] i386: DPPS rounding fix Paul Brook
2022-04-25 16:09   ` Richard Henderson
2022-04-24 22:01 ` [PATCH v2 03/42] Add AVX_EN hflag Paul Brook
2022-04-25 17:27   ` Richard Henderson
2022-04-24 22:01 ` [PATCH v2 04/42] i386: Rework sse_op_table1 Paul Brook
2022-04-24 22:01 ` [PATCH v2 05/42] i386: Rework sse_op_table6/7 Paul Brook
2022-04-24 22:01 ` [PATCH v2 06/42] i386: Add CHECK_NO_VEX Paul Brook
2022-04-25 20:39   ` Richard Henderson
2022-04-25 20:41   ` Richard Henderson
2022-04-24 22:01 ` [PATCH v2 07/42] Enforce VEX encoding restrictions Paul Brook
2022-04-25 20:42   ` Richard Henderson
2022-04-25 21:00   ` Richard Henderson
2022-04-27  9:08   ` Paolo Bonzini
2022-04-24 22:01 ` [PATCH v2 08/42] i386: Add ZMM_OFFSET macro Paul Brook
2022-04-25 21:03   ` Richard Henderson
2022-04-24 22:01 ` [PATCH v2 09/42] i386: Helper macro for 256 bit AVX helpers Paul Brook
2022-04-24 22:01 ` [PATCH v2 10/42] i386: Rewrite vector shift helper Paul Brook
2022-04-25 21:33   ` Richard Henderson
2022-04-27  6:51     ` Paolo Bonzini
2022-04-24 22:01 ` [PATCH v2 11/42] i386: Rewrite simple integer vector helpers Paul Brook
2022-04-24 22:01 ` [PATCH v2 12/42] i386: Misc integer AVX helper prep Paul Brook
2022-04-24 22:01 ` [PATCH v2 13/42] i386: Destructive vector helpers for AVX Paul Brook
2022-04-27  6:53   ` Paolo Bonzini
2022-04-24 22:01 ` [PATCH v2 14/42] i386: Add size suffix to vector FP helpers Paul Brook
2022-04-24 22:01 ` [PATCH v2 15/42] i386: Floating point atithmetic helper AVX prep Paul Brook
2022-04-24 22:01 ` [PATCH v2 16/42] i386: Dot product AVX helper prep Paul Brook
2022-04-24 22:01 ` [PATCH v2 17/42] i386: Destructive FP helpers for AVX Paul Brook
2022-04-24 22:01 ` [PATCH v2 18/42] i386: Misc AVX helper prep Paul Brook
2022-04-24 22:01 ` [PATCH v2 19/42] i386: Rewrite blendv helpers Paul Brook
2022-04-24 22:01 ` [PATCH v2 20/42] i386: AVX pclmulqdq Paul Brook
2022-04-24 22:01 ` [PATCH v2 21/42] i386: AVX+AES helpers Paul Brook
2022-04-24 22:01 ` [PATCH v2 22/42] i386: Update ops_sse_helper.h ready for 256 bit AVX Paul Brook
2022-04-24 22:01 ` [PATCH v2 23/42] i386: AVX comparison helpers Paul Brook
2022-04-24 22:01 ` [PATCH v2 24/42] i386: Move 3DNOW decoder Paul Brook
2022-04-24 22:01 ` [PATCH v2 25/42] i386: VEX.V encodings (3 operand) Paul Brook
2022-04-24 22:01 ` [PATCH v2 26/42] i386: Utility function for 128 bit AVX Paul Brook
2022-04-24 22:01 ` [PATCH v2 27/42] i386: Translate 256 bit AVX instructions Paul Brook
2022-04-24 22:01 ` [PATCH v2 28/42] i386: Implement VZEROALL and VZEROUPPER Paul Brook
2022-04-24 22:01 ` [PATCH v2 29/42] i386: Implement VBROADCAST Paul Brook
2022-04-24 22:01 ` [PATCH v2 30/42] i386: Implement VPERMIL Paul Brook
2022-04-24 22:01 ` [PATCH v2 31/42] i386: Implement AVX variable shifts Paul Brook
2022-04-24 22:01 ` [PATCH v2 32/42] i386: Implement VTEST Paul Brook
2022-04-24 22:01 ` [PATCH v2 33/42] i386: Implement VMASKMOV Paul Brook
2022-04-24 22:01 ` [PATCH v2 34/42] i386: Implement VGATHER Paul Brook
2022-04-24 22:01 ` Paul Brook [this message]
2022-04-24 22:01 ` [PATCH v2 36/42] i386: Implement VINSERT128/VEXTRACT128 Paul Brook
2022-04-24 22:01 ` [PATCH v2 37/42] i386: Implement VBLENDV Paul Brook
2022-04-24 22:02 ` [PATCH v2 38/42] i386: Implement VPBLENDD Paul Brook
2022-04-24 22:02 ` [PATCH v2 39/42] i386: Enable AVX cpuid bits when using TCG Paul Brook
2022-04-24 22:02 ` [PATCH v2 40/42] Enable all x86-64 cpu features in user mode Paul Brook
2022-04-24 22:02 ` [PATCH v2 41/42] AVX tests Paul Brook
2022-04-24 22:02 ` [PATCH v2 42/42] i386: Add sha512-avx test Paul Brook

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220424220204.2493824-36-paul@nowt.org \
    --to=paul@nowt.org \
    --cc=eduardo@habkost.net \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.