xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Jan Beulich <jbeulich@suse.com>
To: "xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>
Cc: "Andrew Cooper" <andrew.cooper3@citrix.com>,
	"George Dunlap" <george.dunlap@citrix.com>,
	"Wei Liu" <wl@xen.org>, "Roger Pau Monné" <roger.pau@citrix.com>
Subject: [PATCH v3 17/22] x86emul: support {LD,ST}TILECFG
Date: Thu, 22 Apr 2021 16:54:27 +0200	[thread overview]
Message-ID: <bfbd5893-d670-42e5-613d-0f7790f19f5c@suse.com> (raw)
In-Reply-To: <322de6db-e01f-0b57-5777-5d94a13c441a@suse.com>

While ver 043 of the ISA extensions doc also specifies
xcr0_supports_palette() returning false as one of the #GP(0) reasons for
LDTILECFG, the earlier #UD / #GP conditions look to make this fully
dead.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Rebase over struct x86_tilecfg introduction.
v2: New.
---
SDE: -spr

--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -1335,6 +1335,8 @@ static const struct vex {
     { { 0x45 }, 2, T, R, pfx_66, Wn, Ln }, /* vpsrlv{d,q} */
     { { 0x46 }, 2, T, R, pfx_66, W0, Ln }, /* vpsravd */
     { { 0x47 }, 2, T, R, pfx_66, Wn, Ln }, /* vpsllv{d,q} */
+    { { 0x49, 0x00 }, 2, F, R, pfx_no, W0, L0 }, /* ldtilecfg */
+    { { 0x49, 0x00 }, 2, F, W, pfx_66, W0, L0 }, /* sttilecfg */
     { { 0x49, 0xc0 }, 2, F, N, pfx_no, W0, L0 }, /* tilerelease */
     { { 0x50 }, 2, T, R, pfx_66, W0, Ln }, /* vpdpbusd */
     { { 0x51 }, 2, T, R, pfx_66, W0, Ln }, /* vpdpbusds */
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -898,6 +898,11 @@ int main(int argc, char **argv)
     int rc;
 #ifdef __x86_64__
     unsigned int vendor_native;
+    static const struct x86_tilecfg tilecfg = {
+        .palette = 1,
+        .colsb = { 2, 4, 5, 3 },
+        .rows = { 2, 4, 3, 5 },
+    };
 #else
     unsigned int bcdres_native, bcdres_emul;
 #endif
@@ -4463,6 +4468,74 @@ int main(int argc, char **argv)
         printf("skipped\n");
 
 #ifdef __x86_64__
+    printf("%-40s", "Testing tilerelease;sttilecfg 4(%rcx)...");
+    if ( stack_exec && cpu_has_amx_tile )
+    {
+        decl_insn(tilerelease);
+
+        asm volatile ( put_insn(tilerelease,
+                                /* tilerelease */
+                                ".byte 0xC4, 0xE2, 0x78, 0x49, 0xC0;"
+                                /* sttilecfg 4(%0) */
+                                ".byte 0xC4, 0xE2, 0x79, 0x49, 0x41, 0x04")
+                                :: "c" (NULL) );
+
+        memset(res, ~0, 72);
+        set_insn(tilerelease);
+        regs.ecx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc == X86EMUL_OKAY )
+            rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(tilerelease) ||
+             ~res[0] || ~res[17] || memchr_inv(res + 1, 0, 64) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing ldtilecfg (%rdx)...");
+    if ( stack_exec && cpu_has_amx_tile )
+    {
+        decl_insn(ldtilecfg);
+
+        asm volatile ( put_insn(ldtilecfg,
+                                /* ldtilecfg (%0) */
+                                ".byte 0xC4, 0xE2, 0x78, 0x49, 0x02")
+                                :: "d" (NULL) );
+
+        set_insn(ldtilecfg);
+        regs.edx = (unsigned long)&tilecfg;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(ldtilecfg) )
+            goto fail;
+        printf("pending\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing sttilecfg -4(%rcx)...");
+    if ( stack_exec && cpu_has_amx_tile )
+    {
+        decl_insn(sttilecfg);
+
+        asm volatile ( put_insn(sttilecfg,
+                                /* sttilecfg -4(%0) */
+                                ".byte 0xC4, 0xE2, 0x79, 0x49, 0x41, 0xFC")
+                                :: "c" (NULL) );
+
+        memset(res, ~0, 72);
+        set_insn(sttilecfg);
+        regs.ecx = (unsigned long)(res + 2);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(sttilecfg) ||
+             ~res[0] || ~res[17] || memcmp(res + 1, &tilecfg, 64) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing vzeroupper (compat)...");
     if ( cpu_has_avx )
     {
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -68,6 +68,17 @@
 
 #define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >> 63))
 
+static inline void *memchr_inv(const void *s, int c, size_t n)
+{
+    const unsigned char *p = s;
+
+    while ( n-- )
+        if ( (unsigned char)c != *p++ )
+            return (void *)(p - 1);
+
+    return NULL;
+}
+
 extern uint32_t mxcsr_mask;
 extern struct cpuid_policy cp;
 
@@ -171,6 +182,8 @@ static inline bool xcr0_mask(uint64_t ma
 #define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6))
 #define cpu_has_avx512_vp2intersect (cp.feat.avx512_vp2intersect && xcr0_mask(0xe6))
 #define cpu_has_serialize  cp.feat.serialize
+#define cpu_has_amx_tile   (cp.feat.amx_tile && \
+                            xcr0_mask(X86_XCR0_TILECFG | X86_XCR0_TILEDATA))
 #define cpu_has_avx_vnni   (cp.feat.avx_vnni && xcr0_mask(6))
 #define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6))
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -957,6 +957,7 @@ typedef union {
     uint64_t __attribute__ ((aligned(16))) xmm[2];
     uint64_t __attribute__ ((aligned(32))) ymm[4];
     uint64_t __attribute__ ((aligned(64))) zmm[8];
+    struct x86_tilecfg tilecfg;
     uint32_t data32[16];
 } mmval_t;
 
@@ -2880,6 +2881,10 @@ x86_decode_0f38(
         state->simd_size = simd_scalar_vexw;
         break;
 
+    case X86EMUL_OPC_VEX_66(0, 0x49): /* sttilecfg */
+        state->desc = DstMem | SrcImplicit | Mov;
+        break;
+
     case X86EMUL_OPC_EVEX_66(0, 0x7a): /* vpbroadcastb */
     case X86EMUL_OPC_EVEX_66(0, 0x7b): /* vpbroadcastw */
     case X86EMUL_OPC_EVEX_66(0, 0x7c): /* vpbroadcast{d,q} */
@@ -9518,7 +9523,66 @@ x86_emulate(
                 goto unrecognized_insn;
             }
         }
-        goto unimplemented_insn;
+
+        switch ( modrm_reg & 7 )
+        {
+        case 0: /* ldtilecfg mem */
+            generate_exception_if(vex.reg != 0xf, EXC_UD);
+            host_and_vcpu_must_have(amx_tile);
+            get_fpu(X86EMUL_FPU_tilecfg);
+            rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64, ctxt);
+            if ( rc != X86EMUL_OKAY )
+                goto done;
+            generate_exception_if((mmvalp->tilecfg.palette >
+                                   ctxt->cpuid->tile.max_palette),
+                                  EXC_GP, 0);
+            if ( mmvalp->tilecfg.palette )
+            {
+                const typeof(*ctxt->cpuid->tile.palette) *palette;
+
+                generate_exception_if(memchr_inv(mmvalp->tilecfg.res, 0,
+                                                 sizeof(mmvalp->tilecfg.res)),
+                                      EXC_GP, 0);
+
+                /*
+                 * Parameters for valid registers must be within bounds, or
+                 * both be zero at the same time.
+                 */
+                palette = &ctxt->cpuid->tile.palette[mmvalp->tilecfg.palette];
+                for ( i = 0; i < palette->num_regs; ++i )
+                    generate_exception_if(((mmvalp->tilecfg.colsb[i] >
+                                            palette->bytes_per_row) ||
+                                           (mmvalp->tilecfg.rows[i] >
+                                            palette->max_rows) ||
+                                           (!mmvalp->tilecfg.colsb[i] !=
+                                            !mmvalp->tilecfg.rows[i])),
+                                          EXC_GP, 0);
+
+                /* All remaining entries must be zero. */
+                for ( ; i < 16; ++i )
+                    generate_exception_if((mmvalp->tilecfg.colsb[i] ||
+                                           mmvalp->tilecfg.rows[i]),
+                                          EXC_GP, 0);
+            }
+            op_bytes = 64;
+            goto simd_0f_common;
+        }
+        goto unrecognized_insn;
+
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x49):
+        generate_exception_if(!mode_64bit() || vex.l || vex.w, EXC_UD);
+        if ( ea.type == OP_REG )
+            goto unrecognized_insn;
+
+        switch ( modrm_reg & 7 )
+        {
+        case 0: /* sttilecfg mem */
+            host_and_vcpu_must_have(amx_tile);
+            get_fpu(X86EMUL_FPU_tilecfg);
+            op_bytes = 64;
+            goto simd_0f_common;
+        }
+        goto unrecognized_insn;
 
     case X86EMUL_OPC_VEX_66(0x0f38, 0x50): /* vpdpbusd [xy]mm/mem,[xy]mm,[xy]mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x51): /* vpdpbusds [xy]mm/mem,[xy]mm,[xy]mm */



  parent reply	other threads:[~2021-04-22 14:54 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-22 14:38 [PATCH v3 00/22] xvmalloc() / x86 xstate area / x86 CPUID / AMX+XFD Jan Beulich
2021-04-22 14:43 ` [PATCH v3 01/22] mm: introduce xvmalloc() et al and use for grant table allocations Jan Beulich
2021-05-03 11:31   ` Roger Pau Monné
2021-05-03 13:50     ` Jan Beulich
2021-05-03 14:54       ` Roger Pau Monné
2021-05-03 15:21         ` Jan Beulich
2021-05-03 16:39           ` Roger Pau Monné
2021-04-22 14:44 ` [PATCH v3 02/22] x86/xstate: use xvzalloc() for save area allocation Jan Beulich
2021-05-05 13:29   ` Roger Pau Monné
2021-04-22 14:44 ` [PATCH v3 03/22] x86/xstate: re-size save area when CPUID policy changes Jan Beulich
2021-05-03 13:57   ` Andrew Cooper
2021-05-03 14:22     ` Jan Beulich
2021-05-11 16:41       ` Andrew Cooper
2021-05-17  7:33         ` Jan Beulich
2021-04-22 14:45 ` [PATCH v3 04/22] x86/xstate: re-use valid_xcr0() for boot-time checks Jan Beulich
2021-05-03 11:53   ` Andrew Cooper
2021-04-22 14:45 ` [PATCH v3 05/22] x86/xstate: drop xstate_offsets[] and xstate_sizes[] Jan Beulich
2021-05-03 16:10   ` Andrew Cooper
2021-05-04  7:57     ` Jan Beulich
2021-04-22 14:46 ` [PATCH v3 06/22] x86/xstate: replace xsave_cntxt_size and drop XCNTXT_MASK Jan Beulich
2021-04-22 14:47 ` [PATCH v3 07/22] x86/xstate: avoid accounting for unsupported components Jan Beulich
2021-04-22 14:47 ` [PATCH v3 08/22] x86: use xvmalloc() for extended context buffer allocations Jan Beulich
2021-04-22 14:48 ` [PATCH v3 09/22] x86/xstate: enable AMX components Jan Beulich
2021-04-22 14:50 ` [PATCH v3 10/22] x86/CPUID: adjust extended leaves out of range clearing Jan Beulich
2021-04-22 14:50 ` [PATCH v3 11/22] x86/CPUID: move bounding of max_{,sub}leaf fields to library code Jan Beulich
2021-04-22 14:51 ` [PATCH v3 12/22] x86/CPUID: enable AMX leaves Jan Beulich
2021-04-22 14:52 ` [PATCH v3 13/22] x86: XFD enabling Jan Beulich
2021-04-22 14:53 ` [PATCH v3 14/22] x86emul: introduce X86EMUL_FPU_{tilecfg,tile} Jan Beulich
2021-04-22 14:53 ` [PATCH v3 15/22] x86emul: support TILERELEASE Jan Beulich
2021-04-22 14:53 ` [PATCH v3 16/22] x86: introduce struct for TILECFG register Jan Beulich
2021-04-22 14:54 ` Jan Beulich [this message]
2021-04-22 14:55 ` [PATCH v3 18/22] x86emul: support TILEZERO Jan Beulich
2021-04-22 14:55 ` [PATCH v3 19/22] x86emul: support TILELOADD{,T1} and TILESTORE Jan Beulich
2021-04-22 15:06   ` Jan Beulich
2021-04-22 15:11     ` Jan Beulich
2021-04-26  7:12       ` Paul Durrant
2021-04-29  9:40         ` Jan Beulich
2021-04-22 14:56 ` [PATCH v3 20/22] x86emul: support tile multiplication insns Jan Beulich
2021-04-22 14:57 ` [PATCH v3 21/22] x86emul: test AMX insns Jan Beulich
2021-04-22 14:57 ` [PATCH v3 22/22] x86: permit guests to use AMX and XFD Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bfbd5893-d670-42e5-613d-0f7790f19f5c@suse.com \
    --to=jbeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=george.dunlap@citrix.com \
    --cc=roger.pau@citrix.com \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    --subject='Re: [PATCH v3 17/22] x86emul: support {LD,ST}TILECFG' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).