All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paul Brook <paul@nowt.org>
To: Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Eduardo Habkost <eduardo@habkost.net>
Cc: "open list:All patches CC here" <qemu-devel@nongnu.org>,
	Paul Brook <paul@nowt.org>
Subject: [PATCH v2 29/42] i386: Implement VBROADCAST
Date: Sun, 24 Apr 2022 23:01:51 +0100	[thread overview]
Message-ID: <20220424220204.2493824-30-paul@nowt.org> (raw)
In-Reply-To: <20220418173904.3746036-1-paul@nowt.org>

The catch here is that these are whole vector operations (not independent 128
bit lanes). We abuse the SSE_OPF_SCALAR flag to select the memory operand
width appropriately.

Signed-off-by: Paul Brook <paul@nowt.org>
---
 target/i386/ops_sse.h        | 51 ++++++++++++++++++++++++++++++++++++
 target/i386/ops_sse_header.h |  8 ++++++
 target/i386/tcg/translate.c  | 42 ++++++++++++++++++++++++++++-
 3 files changed, 100 insertions(+), 1 deletion(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index a1f50f0c8b..4115c9a257 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -3071,7 +3071,57 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
 #endif
 #endif
 
+#if SHIFT >= 1
+void glue(helper_vbroadcastb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    uint8_t val = s->B(0);
+    int i;
+
+    for (i = 0; i < 16 * SHIFT; i++) {
+        d->B(i) = val;
+    }
+}
+
+void glue(helper_vbroadcastw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    uint16_t val = s->W(0);
+    int i;
+
+    for (i = 0; i < 8 * SHIFT; i++) {
+        d->W(i) = val;
+    }
+}
+
+void glue(helper_vbroadcastl, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    uint32_t val = s->L(0);
+    int i;
+
+    for (i = 0; i < 8 * SHIFT; i++) {
+        d->L(i) = val;
+    }
+}
+
+void glue(helper_vbroadcastq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    uint64_t val = s->Q(0);
+    d->Q(0) = val;
+    d->Q(1) = val;
 #if SHIFT == 2
+    d->Q(2) = val;
+    d->Q(3) = val;
+#endif
+}
+
+#if SHIFT == 2
+void glue(helper_vbroadcastdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    d->Q(0) = s->Q(0);
+    d->Q(1) = s->Q(1);
+    d->Q(2) = s->Q(0);
+    d->Q(3) = s->Q(1);
+}
+
 void helper_vzeroall(CPUX86State *env)
 {
     int i;
@@ -3118,6 +3168,7 @@ void helper_vzeroupper_hi8(CPUX86State *env)
 }
 #endif
 #endif
+#endif
 
 #undef SSE_HELPER_S
 
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 48f0945917..51e02cd4fa 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -411,7 +411,14 @@ DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32)
 DEF_HELPER_5(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, Reg, i32)
 #endif
 
+/* AVX helpers */
+#if SHIFT >= 1
+DEF_HELPER_3(glue(vbroadcastb, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(vbroadcastw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(vbroadcastl, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(vbroadcastq, SUFFIX), void, env, Reg, Reg)
 #if SHIFT == 2
+DEF_HELPER_3(glue(vbroadcastdq, SUFFIX), void, env, Reg, Reg)
 DEF_HELPER_1(vzeroall, void, env)
 DEF_HELPER_1(vzeroupper, void, env)
 #ifdef TARGET_X86_64
@@ -419,6 +426,7 @@ DEF_HELPER_1(vzeroall_hi8, void, env)
 DEF_HELPER_1(vzeroupper_hi8, void, env)
 #endif
 #endif
+#endif
 
 #undef SHIFT
 #undef Reg
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index ba70aeb039..59ab1dc562 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3255,6 +3255,11 @@ static const struct SSEOpHelper_table6 sse_op_table6[256] = {
     [0x14] = BLENDV_OP(blendvps, SSE41, 0),
     [0x15] = BLENDV_OP(blendvpd, SSE41, 0),
     [0x17] = CMP_OP(ptest, SSE41),
+    /* TODO:Some vbroadcast variants require AVX2 */
+    [0x18] = UNARY_OP(vbroadcastl, AVX, SSE_OPF_SCALAR), /* vbroadcastss */
+    [0x19] = UNARY_OP(vbroadcastq, AVX, SSE_OPF_SCALAR), /* vbroadcastsd */
+#define gen_helper_vbroadcastdq_xmm NULL
+    [0x1a] = UNARY_OP(vbroadcastdq, AVX, SSE_OPF_SCALAR), /* vbroadcastf128 */
     [0x1c] = UNARY_OP_MMX(pabsb, SSSE3),
     [0x1d] = UNARY_OP_MMX(pabsw, SSSE3),
     [0x1e] = UNARY_OP_MMX(pabsd, SSSE3),
@@ -3286,6 +3291,16 @@ static const struct SSEOpHelper_table6 sse_op_table6[256] = {
     [0x40] = BINARY_OP(pmulld, SSE41, SSE_OPF_MMX),
 #define gen_helper_phminposuw_ymm NULL
     [0x41] = UNARY_OP(phminposuw, SSE41, 0),
+    /* vpbroadcastd */
+    [0x58] = UNARY_OP(vbroadcastl, AVX, SSE_OPF_SCALAR | SSE_OPF_MMX),
+    /* vpbroadcastq */
+    [0x59] = UNARY_OP(vbroadcastq, AVX, SSE_OPF_SCALAR | SSE_OPF_MMX),
+    /* vbroadcasti128 */
+    [0x5a] = UNARY_OP(vbroadcastdq, AVX, SSE_OPF_SCALAR | SSE_OPF_MMX),
+    /* vpbroadcastb */
+    [0x78] = UNARY_OP(vbroadcastb, AVX, SSE_OPF_SCALAR | SSE_OPF_MMX),
+    /* vpbroadcastw */
+    [0x79] = UNARY_OP(vbroadcastw, AVX, SSE_OPF_SCALAR | SSE_OPF_MMX),
 #define gen_helper_aesimc_ymm NULL
     [0xdb] = UNARY_OP(aesimc, AES, 0),
     [0xdc] = BINARY_OP(aesenc, AES, 0),
@@ -4323,6 +4338,24 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State, xmm_t0);
                     gen_lea_modrm(env, s, modrm);
                     switch (b) {
+                    case 0x78: /* vpbroadcastb */
+                        size = 8;
+                        break;
+                    case 0x79: /* vpbroadcastw */
+                        size = 16;
+                        break;
+                    case 0x18: /* vbroadcastss */
+                    case 0x58: /* vpbroadcastd */
+                        size = 32;
+                        break;
+                    case 0x19: /* vbroadcastsd */
+                    case 0x59: /* vpbroadcastq */
+                        size = 64;
+                        break;
+                    case 0x1a: /* vbroadcastf128 */
+                    case 0x5a: /* vbroadcasti128 */
+                        size = 128;
+                        break;
                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
@@ -4346,10 +4379,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     default:
                         size = 128;
                     }
-                    if (s->vex_l) {
+                    /* 256 bit vbroadcast only load a single element.  */
+                    if ((op6.flags & SSE_OPF_SCALAR) == 0 && s->vex_l) {
                         size *= 2;
                     }
                     switch (size) {
+                    case 8:
+                        tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
+                                           s->mem_index, MO_UB);
+                        tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
+                                        offsetof(ZMMReg, ZMM_B(0)));
+                        break;
                     case 16:
                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
                                            s->mem_index, MO_LEUW);
-- 
2.36.0



  parent reply	other threads:[~2022-04-24 22:21 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-18 17:39 [PATCH 0/3] AVX guest implementation Paul Brook
2022-04-18 17:39 ` [PATCH 1/4] Add AVX_EN hflag Paul Brook
2022-04-18 17:39 ` [PATCH 2/4] TCG support for AVX Paul Brook
2022-04-18 19:33   ` Peter Maydell
2022-04-18 19:45     ` Paul Brook
2022-04-18 19:50       ` Peter Maydell
2022-04-18 23:14       ` Richard Henderson
2022-04-20 14:19       ` Paolo Bonzini
2022-04-20 18:59         ` Paul Brook
2022-04-18 17:39 ` [PATCH 3/4] Enable all x86-64 cpu features in user mode Paul Brook
2022-04-18 17:39 ` [PATCH 4/4] AVX tests Paul Brook
2022-04-19 10:34   ` Alex Bennée
2022-04-24 22:01 ` [PATCH v2 01/42] i386: pcmpestr 64-bit sign extension bug Paul Brook
2022-04-25 15:50   ` Richard Henderson
2022-04-27  7:00   ` Paolo Bonzini
2022-04-24 22:01 ` [PATCH v2 02/42] i386: DPPS rounding fix Paul Brook
2022-04-25 16:09   ` Richard Henderson
2022-04-24 22:01 ` [PATCH v2 03/42] Add AVX_EN hflag Paul Brook
2022-04-25 17:27   ` Richard Henderson
2022-04-24 22:01 ` [PATCH v2 04/42] i386: Rework sse_op_table1 Paul Brook
2022-04-24 22:01 ` [PATCH v2 05/42] i386: Rework sse_op_table6/7 Paul Brook
2022-04-24 22:01 ` [PATCH v2 06/42] i386: Add CHECK_NO_VEX Paul Brook
2022-04-25 20:39   ` Richard Henderson
2022-04-25 20:41   ` Richard Henderson
2022-04-24 22:01 ` [PATCH v2 07/42] Enforce VEX encoding restrictions Paul Brook
2022-04-25 20:42   ` Richard Henderson
2022-04-25 21:00   ` Richard Henderson
2022-04-27  9:08   ` Paolo Bonzini
2022-04-24 22:01 ` [PATCH v2 08/42] i386: Add ZMM_OFFSET macro Paul Brook
2022-04-25 21:03   ` Richard Henderson
2022-04-24 22:01 ` [PATCH v2 09/42] i386: Helper macro for 256 bit AVX helpers Paul Brook
2022-04-24 22:01 ` [PATCH v2 10/42] i386: Rewrite vector shift helper Paul Brook
2022-04-25 21:33   ` Richard Henderson
2022-04-27  6:51     ` Paolo Bonzini
2022-04-24 22:01 ` [PATCH v2 11/42] i386: Rewrite simple integer vector helpers Paul Brook
2022-04-24 22:01 ` [PATCH v2 12/42] i386: Misc integer AVX helper prep Paul Brook
2022-04-24 22:01 ` [PATCH v2 13/42] i386: Destructive vector helpers for AVX Paul Brook
2022-04-27  6:53   ` Paolo Bonzini
2022-04-24 22:01 ` [PATCH v2 14/42] i386: Add size suffix to vector FP helpers Paul Brook
2022-04-24 22:01 ` [PATCH v2 15/42] i386: Floating point atithmetic helper AVX prep Paul Brook
2022-04-24 22:01 ` [PATCH v2 16/42] i386: Dot product AVX helper prep Paul Brook
2022-04-24 22:01 ` [PATCH v2 17/42] i386: Destructive FP helpers for AVX Paul Brook
2022-04-24 22:01 ` [PATCH v2 18/42] i386: Misc AVX helper prep Paul Brook
2022-04-24 22:01 ` [PATCH v2 19/42] i386: Rewrite blendv helpers Paul Brook
2022-04-24 22:01 ` [PATCH v2 20/42] i386: AVX pclmulqdq Paul Brook
2022-04-24 22:01 ` [PATCH v2 21/42] i386: AVX+AES helpers Paul Brook
2022-04-24 22:01 ` [PATCH v2 22/42] i386: Update ops_sse_helper.h ready for 256 bit AVX Paul Brook
2022-04-24 22:01 ` [PATCH v2 23/42] i386: AVX comparison helpers Paul Brook
2022-04-24 22:01 ` [PATCH v2 24/42] i386: Move 3DNOW decoder Paul Brook
2022-04-24 22:01 ` [PATCH v2 25/42] i386: VEX.V encodings (3 operand) Paul Brook
2022-04-24 22:01 ` [PATCH v2 26/42] i386: Utility function for 128 bit AVX Paul Brook
2022-04-24 22:01 ` [PATCH v2 27/42] i386: Translate 256 bit AVX instructions Paul Brook
2022-04-24 22:01 ` [PATCH v2 28/42] i386: Implement VZEROALL and VZEROUPPER Paul Brook
2022-04-24 22:01 ` Paul Brook [this message]
2022-04-24 22:01 ` [PATCH v2 30/42] i386: Implement VPERMIL Paul Brook
2022-04-24 22:01 ` [PATCH v2 31/42] i386: Implement AVX variable shifts Paul Brook
2022-04-24 22:01 ` [PATCH v2 32/42] i386: Implement VTEST Paul Brook
2022-04-24 22:01 ` [PATCH v2 33/42] i386: Implement VMASKMOV Paul Brook
2022-04-24 22:01 ` [PATCH v2 34/42] i386: Implement VGATHER Paul Brook
2022-04-24 22:01 ` [PATCH v2 35/42] i386: Implement VPERM Paul Brook
2022-04-24 22:01 ` [PATCH v2 36/42] i386: Implement VINSERT128/VEXTRACT128 Paul Brook
2022-04-24 22:01 ` [PATCH v2 37/42] i386: Implement VBLENDV Paul Brook
2022-04-24 22:02 ` [PATCH v2 38/42] i386: Implement VPBLENDD Paul Brook
2022-04-24 22:02 ` [PATCH v2 39/42] i386: Enable AVX cpuid bits when using TCG Paul Brook
2022-04-24 22:02 ` [PATCH v2 40/42] Enable all x86-64 cpu features in user mode Paul Brook
2022-04-24 22:02 ` [PATCH v2 41/42] AVX tests Paul Brook
2022-04-24 22:02 ` [PATCH v2 42/42] i386: Add sha512-avx test Paul Brook

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220424220204.2493824-30-paul@nowt.org \
    --to=paul@nowt.org \
    --cc=eduardo@habkost.net \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.