All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Beulich <jbeulich@suse.com>
To: "xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>
Cc: "Andrew Cooper" <andrew.cooper3@citrix.com>,
	"George Dunlap" <george.dunlap@citrix.com>,
	"Wei Liu" <wl@xen.org>, "Roger Pau Monné" <roger.pau@citrix.com>
Subject: [PATCH v3 20/22] x86emul: support tile multiplication insns
Date: Thu, 22 Apr 2021 16:56:12 +0200	[thread overview]
Message-ID: <33ff565f-0e08-4dc6-c041-ad3422544d54@suse.com> (raw)
In-Reply-To: <322de6db-e01f-0b57-5777-5d94a13c441a@suse.com>

Since these don't allow for memory operands, the main thing to do here
is to check the large set of #UD conditions.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: New.

--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -1349,6 +1349,11 @@ static const struct vex {
     { { 0x58 }, 2, T, R, pfx_66, W0, Ln }, /* vpbroadcastd */
     { { 0x59 }, 2, T, R, pfx_66, W0, Ln }, /* vpbroadcastq */
     { { 0x5a }, 2, F, R, pfx_66, W0, L1 }, /* vbroadcasti128 */
+    { { 0x5c, 0xc0 }, 2, F, N, pfx_f3, W0, L0 }, /* tdpbf16ps */
+    { { 0x5e, 0xc0 }, 2, F, N, pfx_no, W0, L0 }, /* tdpbuud */
+    { { 0x5e, 0xc0 }, 2, F, N, pfx_66, W0, L0 }, /* tdpbusd */
+    { { 0x5e, 0xc0 }, 2, F, N, pfx_f3, W0, L0 }, /* tdpbsud */
+    { { 0x5e, 0xc0 }, 2, F, N, pfx_f2, W0, L0 }, /* tdpbssd */
     { { 0x78 }, 2, T, R, pfx_66, W0, Ln }, /* vpbroadcastb */
     { { 0x79 }, 2, T, R, pfx_66, W0, Ln }, /* vpbroadcastw */
     { { 0x8c }, 2, F, R, pfx_66, Wn, Ln }, /* vpmaskmov{d,q} */
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -487,6 +487,8 @@ static const struct ext0f38_table {
     [0x59] = { .simd_size = simd_other, .two_op = 1, .d8s = 3 },
     [0x5a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 },
     [0x5b] = { .simd_size = simd_256, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x5c] = { .simd_size = simd_other },
+    [0x5e] = { .simd_size = simd_other },
     [0x62] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_bw },
     [0x63] = { .simd_size = simd_packed_int, .to_mem = 1, .two_op = 1, .d8s = d8s_bw },
     [0x64 ... 0x66] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
@@ -2049,7 +2051,9 @@ amd_like(const struct x86_emulate_ctxt *
 #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
 #define vcpu_has_avx512_vp2intersect() (ctxt->cpuid->feat.avx512_vp2intersect)
 #define vcpu_has_serialize()   (ctxt->cpuid->feat.serialize)
+#define vcpu_has_amx_bf16()    (ctxt->cpuid->feat.amx_bf16)
 #define vcpu_has_amx_tile()    (ctxt->cpuid->feat.amx_tile)
+#define vcpu_has_amx_int8()    (ctxt->cpuid->feat.amx_int8)
 #define vcpu_has_avx_vnni()    (ctxt->cpuid->feat.avx_vnni)
 #define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
 
@@ -9799,6 +9803,59 @@ x86_emulate(
         generate_exception_if(ea.type != OP_MEM || !vex.l || vex.w, EXC_UD);
         goto simd_0f_avx2;
 
+    case X86EMUL_OPC_VEX_F3(0x0f38, 0x5c): /* tdpbf16ps tmm,tmm,tmm */
+    case X86EMUL_OPC_VEX(0x0f38, 0x5e):    /* tdpbuud tmm,tmm,tmm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x5e): /* tdpbusd tmm,tmm,tmm */
+    case X86EMUL_OPC_VEX_F3(0x0f38, 0x5e): /* tdpbsud tmm,tmm,tmm */
+    case X86EMUL_OPC_VEX_F2(0x0f38, 0x5e): /* tdpbssd tmm,tmm,tmm */
+    {
+        unsigned int vreg = vex.reg ^ 0xf;
+
+        if ( ea.type != OP_REG )
+            goto unimplemented_insn;
+        generate_exception_if(!mode_64bit() || vex.l || vex.w, EXC_UD);
+        if ( b == 0x5c )
+            host_and_vcpu_must_have(amx_bf16);
+        else
+            host_and_vcpu_must_have(amx_int8);
+        generate_exception_if(modrm_reg == modrm_rm, EXC_UD);
+        generate_exception_if(modrm_reg == vreg, EXC_UD);
+        generate_exception_if(modrm_rm == vreg, EXC_UD);
+
+        get_fpu(X86EMUL_FPU_tile);
+        sttilecfg(&mmvalp->tilecfg);
+        generate_exception_if(!tiles_configured(&mmvalp->tilecfg), EXC_UD);
+
+        /* accum: modrm_reg */
+        generate_exception_if(!tile_valid(modrm_reg, &mmvalp->tilecfg), EXC_UD);
+        /* src1: modrm_rm */
+        generate_exception_if(!tile_valid(modrm_rm, &mmvalp->tilecfg), EXC_UD);
+        /* src2: vreg */
+        generate_exception_if(!tile_valid(vreg, &mmvalp->tilecfg), EXC_UD);
+
+        generate_exception_if(mmvalp->tilecfg.colsb[modrm_reg] & 3, EXC_UD);
+        /*
+         * These are redundant with the check just below.
+        generate_exception_if(mmvalp->tilecfg.colsb[modrm_rm] & 3, EXC_UD);
+        generate_exception_if(mmvalp->tilecfg.colsb[vreg] & 3, EXC_UD);
+         */
+
+        generate_exception_if(mmvalp->tilecfg.rows[modrm_reg] !=
+                              mmvalp->tilecfg.rows[modrm_rm], EXC_UD);
+        generate_exception_if(mmvalp->tilecfg.colsb[modrm_reg] !=
+                              mmvalp->tilecfg.colsb[vreg], EXC_UD);
+        generate_exception_if(mmvalp->tilecfg.colsb[modrm_rm] !=
+                              mmvalp->tilecfg.rows[vreg] * 4, EXC_UD);
+
+        generate_exception_if(mmvalp->tilecfg.colsb[vreg] >
+                              ctxt->cpuid->tmul.maxn, EXC_UD);
+        generate_exception_if(mmvalp->tilecfg.rows[vreg] >
+                              ctxt->cpuid->tmul.maxk, EXC_UD);
+
+        op_bytes = 1; /* fake */
+        goto simd_0f_common;
+    }
+
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x62): /* vpexpand{b,w} [xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x63): /* vpcompress{b,w} [xyz]mm,[xyz]mm/mem{k} */
         host_and_vcpu_must_have(avx512_vbmi2);
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -133,7 +133,9 @@
 #define cpu_has_avx512_vp2intersect boot_cpu_has(X86_FEATURE_AVX512_VP2INTERSECT)
 #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
 #define cpu_has_serialize       boot_cpu_has(X86_FEATURE_SERIALIZE)
+#define cpu_has_amx_bf16        boot_cpu_has(X86_FEATURE_AMX_BF16)
 #define cpu_has_amx_tile        boot_cpu_has(X86_FEATURE_AMX_TILE)
+#define cpu_has_amx_int8        boot_cpu_has(X86_FEATURE_AMX_INT8)
 
 /* CPUID level 0x00000007:1.eax */
 #define cpu_has_avx_vnni        boot_cpu_has(X86_FEATURE_AVX_VNNI)



  parent reply	other threads:[~2021-04-22 14:56 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-22 14:38 [PATCH v3 00/22] xvmalloc() / x86 xstate area / x86 CPUID / AMX+XFD Jan Beulich
2021-04-22 14:43 ` [PATCH v3 01/22] mm: introduce xvmalloc() et al and use for grant table allocations Jan Beulich
2021-05-03 11:31   ` Roger Pau Monné
2021-05-03 13:50     ` Jan Beulich
2021-05-03 14:54       ` Roger Pau Monné
2021-05-03 15:21         ` Jan Beulich
2021-05-03 16:39           ` Roger Pau Monné
2021-04-22 14:44 ` [PATCH v3 02/22] x86/xstate: use xvzalloc() for save area allocation Jan Beulich
2021-05-05 13:29   ` Roger Pau Monné
2021-04-22 14:44 ` [PATCH v3 03/22] x86/xstate: re-size save area when CPUID policy changes Jan Beulich
2021-05-03 13:57   ` Andrew Cooper
2021-05-03 14:22     ` Jan Beulich
2021-05-11 16:41       ` Andrew Cooper
2021-05-17  7:33         ` Jan Beulich
2021-04-22 14:45 ` [PATCH v3 04/22] x86/xstate: re-use valid_xcr0() for boot-time checks Jan Beulich
2021-05-03 11:53   ` Andrew Cooper
2021-04-22 14:45 ` [PATCH v3 05/22] x86/xstate: drop xstate_offsets[] and xstate_sizes[] Jan Beulich
2021-05-03 16:10   ` Andrew Cooper
2021-05-04  7:57     ` Jan Beulich
2021-04-22 14:46 ` [PATCH v3 06/22] x86/xstate: replace xsave_cntxt_size and drop XCNTXT_MASK Jan Beulich
2021-04-22 14:47 ` [PATCH v3 07/22] x86/xstate: avoid accounting for unsupported components Jan Beulich
2021-04-22 14:47 ` [PATCH v3 08/22] x86: use xvmalloc() for extended context buffer allocations Jan Beulich
2021-04-22 14:48 ` [PATCH v3 09/22] x86/xstate: enable AMX components Jan Beulich
2021-04-22 14:50 ` [PATCH v3 10/22] x86/CPUID: adjust extended leaves out of range clearing Jan Beulich
2021-04-22 14:50 ` [PATCH v3 11/22] x86/CPUID: move bounding of max_{,sub}leaf fields to library code Jan Beulich
2021-04-22 14:51 ` [PATCH v3 12/22] x86/CPUID: enable AMX leaves Jan Beulich
2021-04-22 14:52 ` [PATCH v3 13/22] x86: XFD enabling Jan Beulich
2021-04-22 14:53 ` [PATCH v3 14/22] x86emul: introduce X86EMUL_FPU_{tilecfg,tile} Jan Beulich
2021-04-22 14:53 ` [PATCH v3 15/22] x86emul: support TILERELEASE Jan Beulich
2021-04-22 14:53 ` [PATCH v3 16/22] x86: introduce struct for TILECFG register Jan Beulich
2021-04-22 14:54 ` [PATCH v3 17/22] x86emul: support {LD,ST}TILECFG Jan Beulich
2021-04-22 14:55 ` [PATCH v3 18/22] x86emul: support TILEZERO Jan Beulich
2021-04-22 14:55 ` [PATCH v3 19/22] x86emul: support TILELOADD{,T1} and TILESTORE Jan Beulich
2021-04-22 15:06   ` Jan Beulich
2021-04-22 15:11     ` Jan Beulich
2021-04-26  7:12       ` Paul Durrant
2021-04-29  9:40         ` Jan Beulich
2021-04-22 14:56 ` Jan Beulich [this message]
2021-04-22 14:57 ` [PATCH v3 21/22] x86emul: test AMX insns Jan Beulich
2021-04-22 14:57 ` [PATCH v3 22/22] x86: permit guests to use AMX and XFD Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=33ff565f-0e08-4dc6-c041-ad3422544d54@suse.com \
    --to=jbeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=george.dunlap@citrix.com \
    --cc=roger.pau@citrix.com \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.