All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PATCH 13/20] tcg/i386: Support avx512vbmi2 vector shift-double instructions
Date: Sat, 18 Dec 2021 11:42:43 -0800	[thread overview]
Message-ID: <20211218194250.247633-14-richard.henderson@linaro.org> (raw)
In-Reply-To: <20211218194250.247633-1-richard.henderson@linaro.org>

We will use VPSHLD, VPSHLDV and VPSHRDV for 16-bit rotates.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target-con-set.h |  1 +
 tcg/i386/tcg-target.opc.h     |  3 +++
 tcg/i386/tcg-target.c.inc     | 38 +++++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+)

diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h
index 78774d1005..91ceb0e1da 100644
--- a/tcg/i386/tcg-target-con-set.h
+++ b/tcg/i386/tcg-target-con-set.h
@@ -45,6 +45,7 @@ C_O1_I2(r, r, rI)
 C_O1_I2(x, x, x)
 C_N1_I2(r, r, r)
 C_N1_I2(r, r, rW)
+C_O1_I3(x, 0, x, x)
 C_O1_I3(x, x, x, x)
 C_O1_I4(r, r, re, r, 0)
 C_O1_I4(r, r, r, ri, ri)
diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target.opc.h
index 1312941800..b5f403e35e 100644
--- a/tcg/i386/tcg-target.opc.h
+++ b/tcg/i386/tcg-target.opc.h
@@ -33,3 +33,6 @@ DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC)
 DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC)
 DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC)
 DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC)
+DEF(x86_vpshldi_vec, 1, 2, 1, IMPLVEC)
+DEF(x86_vpshldv_vec, 1, 3, 0, IMPLVEC)
+DEF(x86_vpshrdv_vec, 1, 3, 0, IMPLVEC)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 7fd6edb887..30b9afc1d3 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -423,6 +423,15 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
 #define OPC_VPROLVQ     (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VPRORVD     (0x14 | P_EXT38 | P_DATA16 | P_EVEX)
 #define OPC_VPRORVQ     (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHLDW     (0x70 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHLDD     (0x71 | P_EXT3A | P_DATA16 | P_EVEX)
+#define OPC_VPSHLDQ     (0x71 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHLDVW    (0x70 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHLDVD    (0x71 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPSHLDVQ    (0x71 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHRDVW    (0x72 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHRDVD    (0x73 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPSHRDVQ    (0x73 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VPSLLVW     (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VPSLLVD     (0x47 | P_EXT38 | P_DATA16)
 #define OPC_VPSLLVQ     (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
@@ -2774,6 +2783,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     static int const sars_insn[4] = {
         OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_VPSRAQ
     };
+    static int const vpshldi_insn[4] = {
+        OPC_UD2, OPC_VPSHLDW, OPC_VPSHLDD, OPC_VPSHLDQ
+    };
+    static int const vpshldv_insn[4] = {
+        OPC_UD2, OPC_VPSHLDVW, OPC_VPSHLDVD, OPC_VPSHLDVQ
+    };
+    static int const vpshrdv_insn[4] = {
+        OPC_UD2, OPC_VPSHRDVW, OPC_VPSHRDVD, OPC_VPSHRDVQ
+    };
     static int const abs_insn[4] = {
         /* TODO: AVX512 adds support for MO_64.  */
         OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2
@@ -2866,6 +2884,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_x86_packus_vec:
         insn = packus_insn[vece];
         goto gen_simd;
+    case INDEX_op_x86_vpshldv_vec:
+        insn = vpshldv_insn[vece];
+        a1 = a2;
+        a2 = args[3];
+        goto gen_simd;
+    case INDEX_op_x86_vpshrdv_vec:
+        insn = vpshrdv_insn[vece];
+        a1 = a2;
+        a2 = args[3];
+        goto gen_simd;
 #if TCG_TARGET_REG_BITS == 32
     case INDEX_op_dup2_vec:
         /* First merge the two 32-bit inputs to a single 64-bit element. */
@@ -2967,7 +2995,12 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
         insn = OPC_VPERM2I128;
         sub = args[3];
         goto gen_simd_imm8;
+    case INDEX_op_x86_vpshldi_vec:
+        insn = vpshldi_insn[vece];
+        sub = args[3];
+        goto gen_simd_imm8;
     gen_simd_imm8:
+        tcg_debug_assert(insn != OPC_UD2);
         if (type == TCG_TYPE_V256) {
             insn |= P_VEXL;
         }
@@ -3211,6 +3244,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_x86_vperm2i128_vec:
     case INDEX_op_x86_punpckl_vec:
     case INDEX_op_x86_punpckh_vec:
+    case INDEX_op_x86_vpshldi_vec:
 #if TCG_TARGET_REG_BITS == 32
     case INDEX_op_dup2_vec:
 #endif
@@ -3225,6 +3259,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_x86_psrldq_vec:
         return C_O1_I1(x, x);
 
+    case INDEX_op_x86_vpshldv_vec:
+    case INDEX_op_x86_vpshrdv_vec:
+        return C_O1_I3(x, 0, x, x);
+
     case INDEX_op_x86_vpblendvb_vec:
         return C_O1_I3(x, x, x, x);
 
-- 
2.25.1



  parent reply	other threads:[~2021-12-18 19:50 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-18 19:42 [PATCH 00/20] tcg: vector improvements Richard Henderson
2021-12-18 19:42 ` [PATCH 01/20] tcg/optimize: Fix folding of vector ops Richard Henderson
2021-12-19 11:37   ` Philippe Mathieu-Daudé
2021-12-18 19:42 ` [PATCH 02/20] tcg: Add opcodes for vector nand, nor, eqv Richard Henderson
2021-12-19 11:28   ` Philippe Mathieu-Daudé
2022-02-01 18:28   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 03/20] tcg/ppc: Implement vector NAND, NOR, EQV Richard Henderson
2021-12-19  0:15   ` Philippe Mathieu-Daudé
2022-02-01 18:29   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 04/20] tcg/s390x: " Richard Henderson
2021-12-19  0:17   ` Philippe Mathieu-Daudé
2022-02-01 18:29   ` Alex Bennée
2022-02-01 18:31   ` Alex Bennée
2024-01-03 13:21   ` Philippe Mathieu-Daudé
2024-01-03 21:58     ` Richard Henderson
2021-12-18 19:42 ` [PATCH 05/20] tcg/i386: Detect AVX512 Richard Henderson
2022-02-01 18:41   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 06/20] tcg/i386: Add tcg_out_evex_opc Richard Henderson
2022-02-01 19:20   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 07/20] tcg/i386: Use tcg_can_emit_vec_op in expand_vec_cmp_noinv Richard Henderson
2022-02-01 19:21   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 08/20] tcg/i386: Implement avx512 variable shifts Richard Henderson
2022-02-01 20:33   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 09/20] tcg/i386: Implement avx512 scalar shift Richard Henderson
2022-02-02 13:48   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 10/20] tcg/i386: Implement avx512 immediate sari shift Richard Henderson
2022-02-02 14:02   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 11/20] tcg/i386: Implement avx512 immediate rotate Richard Henderson
2022-02-02 14:05   ` Alex Bennée
2022-02-03  1:26     ` Richard Henderson
2021-12-18 19:42 ` [PATCH 12/20] tcg/i386: Implement avx512 variable rotate Richard Henderson
2022-02-02 14:14   ` Alex Bennée
2021-12-18 19:42 ` Richard Henderson [this message]
2022-02-02 14:28   ` [PATCH 13/20] tcg/i386: Support avx512vbmi2 vector shift-double instructions Alex Bennée
2021-12-18 19:42 ` [PATCH 14/20] tcg/i386: Expand vector word rotate as avx512vbmi2 shift-double Richard Henderson
2022-02-03 10:32   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 15/20] tcg/i386: Remove rotls_vec from tcg_target_op_def Richard Henderson
2022-02-03 10:34   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 16/20] tcg/i386: Expand scalar rotate with avx512 insns Richard Henderson
2022-02-03 10:38   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 17/20] tcg/i386: Implement avx512 min/max/abs Richard Henderson
2022-02-03 10:44   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 18/20] tcg/i386: Implement avx512 multiply Richard Henderson
2022-02-03 10:45   ` Alex Bennée
2021-12-18 19:42 ` [PATCH 19/20] tcg/i386: Implement more logical operations for avx512 Richard Henderson
2022-02-03 10:46   ` Alex Bennée
2022-02-03 21:54     ` Richard Henderson
2021-12-18 19:42 ` [PATCH 20/20] tcg/i386: Implement bitsel " Richard Henderson
2022-02-03 10:51   ` Alex Bennée
2022-01-29  9:28 ` [PATCH 00/20] tcg: vector improvements Richard Henderson
2022-02-03 10:25 ` Alex Bennée

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211218194250.247633-14-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.