Xen-Devel Archive on lore.kernel.org
 help / color / Atom feed
* [Xen-devel] [PATCH v5 00/10] x86emul: further work
@ 2020-03-24 12:26 Jan Beulich
  2020-03-24 12:29 ` [Xen-devel] [PATCH v5 02/10] x86emul: support AVX512_BF16 insns Jan Beulich
                   ` (13 more replies)
  0 siblings, 14 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:26 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Paul Durrant, Wei Liu, Roger Pau Monne

Some of the later patches are still at least partly RFC, for
varying reasons (see there). I'd appreciate though if at least
some of the earlier ones could go in rather sooner than later.

Patch 1 functionally (for the test harness) depends on
"libx86/CPUID: fix (not just) leaf 7 processing", while at
least patch 2 contextually depends on "x86emul: disable
FPU/MMX/SIMD insn emulation when !HVM".

 1: x86emul: support AVX512_BF16 insns
 2: x86emul: support MOVDIRI insn
 3: x86: determine HAVE_AS_* just once
 4: x86: move back clang no integrated assembler tests
 5: x86emul: support MOVDIR64B insn
 6: x86emul: support ENQCMD insn
 7: x86/HVM: scale MPERF values reported to guests (on AMD)
 8: x86emul: support RDPRU
 9: x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads
10: x86emul: support MCOMMIT

See individual patches for changes from v4 (which was mistakenly
sent out with a v3 tag).

Jan


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v5 02/10] x86emul: support AVX512_BF16 insns
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
@ 2020-03-24 12:29 ` Jan Beulich
  2020-03-24 12:29 ` [Xen-devel] [PATCH v5 02/10] x86emul: support MOVDIRI insn Jan Beulich
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:29 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Paul Durrant, Wei Liu, Roger Pau Monne

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: New.
---
(SDE: -cpx)

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -550,6 +550,12 @@ static const struct test avx512_4vnniw_5
     INSN(p4dpwssds, f2, 0f38, 53, el_4, d, vl),
 };
 
+static const struct test avx512_bf16_all[] = {
+    INSN(vcvtne2ps2bf16, f2, 0f38, 72, vl, d, vl),
+    INSN(vcvtneps2bf16,  f3, 0f38, 72, vl, d, vl),
+    INSN(vdpbf16ps,      f3, 0f38, 52, vl, d, vl),
+};
+
 static const struct test avx512_bitalg_all[] = {
     INSN(popcnt,      66, 0f38, 54, vl, bw, vl),
     INSN(pshufbitqmb, 66, 0f38, 8f, vl,  b, vl),
@@ -984,6 +990,7 @@ void evex_disp8_test(void *instr, struct
     RUN(avx512pf, 512);
     RUN(avx512_4fmaps, 512);
     RUN(avx512_4vnniw, 512);
+    RUN(avx512_bf16, all);
     RUN(avx512_bitalg, all);
     RUN(avx512_ifma, all);
     RUN(avx512_vbmi, all);
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -4516,6 +4516,80 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    if ( stack_exec && cpu_has_avx512_bf16 )
+    {
+        decl_insn(vcvtne2ps2bf16);
+        decl_insn(vcvtneps2bf16);
+        decl_insn(vdpbf16ps);
+        static const struct {
+            float f[16];
+        } in1 = {{
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+        }}, in2 = {{
+            1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16
+        }}, out = {{
+            1 * 1 + 2 * 2, 3 * 3 + 4 * 4,
+            5 * 5 + 6 * 6, 7 * 7 + 8 * 8,
+            9 * 9 + 10 * 10, 11 * 11 + 12 * 12,
+            13 * 13 + 14 * 14, 15 * 15 + 16 * 16,
+            1 * 1 - 2 * 2, 3 * 3 - 4 * 4,
+            5 * 5 - 6 * 6, 7 * 7 - 8 * 8,
+            9 * 9 - 10 * 10, 11 * 11 - 12 * 12,
+            13 * 13 - 14 * 14, 15 * 15 - 16 * 16
+        }};
+
+        printf("%-40s", "Testing vcvtne2ps2bf16 64(%ecx),%zmm1,%zmm2...");
+        asm volatile ( "vmovups %1, %%zmm1\n"
+                       put_insn(vcvtne2ps2bf16,
+                                /* vcvtne2ps2bf16 64(%0), %%zmm1, %%zmm2 */
+                                ".byte 0x62, 0xf2, 0x77, 0x48, 0x72, 0x51, 0x01")
+                       :: "c" (NULL), "m" (in2) );
+        set_insn(vcvtne2ps2bf16);
+        regs.ecx = (unsigned long)&in1 - 64;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vcvtne2ps2bf16) )
+            goto fail;
+        printf("pending\n");
+
+        printf("%-40s", "Testing vcvtneps2bf16 64(%ecx),%ymm3...");
+        asm volatile ( put_insn(vcvtneps2bf16,
+                                /* vcvtneps2bf16 64(%0), %%ymm3 */
+                                ".byte 0x62, 0xf2, 0x7e, 0x48, 0x72, 0x59, 0x01")
+                       :: "c" (NULL) );
+        set_insn(vcvtneps2bf16);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vcvtneps2bf16) )
+            goto fail;
+        asm ( "vmovdqa %%ymm2, %%ymm5\n\t"
+              "vpcmpeqd %%zmm3, %%zmm5, %%k0\n\t"
+              "kmovw %%k0, %0"
+              : "=g" (rc) : "m" (out) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("pending\n");
+
+        printf("%-40s", "Testing vdpbf16ps 128(%ecx),%zmm2,%zmm4...");
+        asm volatile ( "vmovdqa %%ymm3, %0\n\t"
+                       "vmovdqa %%ymm3, %1\n"
+                       put_insn(vdpbf16ps,
+                                /* vdpbf16ps 128(%2), %%zmm2, %%zmm4 */
+                                ".byte 0x62, 0xf2, 0x6e, 0x48, 0x52, 0x61, 0x02")
+                       : "=&m" (res[0]), "=&m" (res[8])
+                       : "c" (NULL)
+                       : "memory" );
+        set_insn(vdpbf16ps);
+        regs.ecx = (unsigned long)res - 128;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vdpbf16ps) )
+            goto fail;
+        asm ( "vcmpeqps %1, %%zmm4, %%k0\n\t"
+              "kmovw %%k0, %0"
+              : "=g" (rc) : "m" (out) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+
     printf("%-40s", "Testing invpcid 16(%ecx),%%edx...");
     if ( stack_exec )
     {
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -156,6 +156,7 @@ static inline bool xcr0_mask(uint64_t ma
 #define cpu_has_avx512_vpopcntdq (cp.feat.avx512_vpopcntdq && xcr0_mask(0xe6))
 #define cpu_has_avx512_4vnniw (cp.feat.avx512_4vnniw && xcr0_mask(0xe6))
 #define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6))
+#define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6))
 
 #define cpu_has_xgetbv1   (cpu_has_xsave && cp.xstate.xgetbv1)
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1904,6 +1904,7 @@ in_protmode(
 #define vcpu_has_rdpid()       (ctxt->cpuid->feat.rdpid)
 #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
 #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
+#define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
 
 #define vcpu_must_have(feat) \
     generate_exception_if(!vcpu_has_##feat(), EXC_UD)
@@ -9152,6 +9153,19 @@ x86_emulate(
         generate_exception_if(evex.w, EXC_UD);
         goto avx512f_no_sae;
 
+    case X86EMUL_OPC_EVEX_F2(0x0f38, 0x72): /* vcvtne2ps2bf16 [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x72): /* vcvtneps2bf16 [xyz]mm/mem,{x,y}mm{k} */
+        if ( evex.pfx == vex_f2 )
+            fault_suppression = false;
+        else
+            d |= TwoOp;
+        /* fall through */
+    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x52): /* vdpbf16ps [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        host_and_vcpu_must_have(avx512_bf16);
+        generate_exception_if(evex.w, EXC_UD);
+        op_bytes = 16 << evex.lr;
+        goto avx512f_no_sae;
+
     case X86EMUL_OPC_VEX_66(0x0f38, 0x58): /* vpbroadcastd xmm/m32,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x59): /* vpbroadcastq xmm/m64,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x78): /* vpbroadcastb xmm/m8,{x,y}mm */
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -129,6 +129,9 @@
 #define cpu_has_avx512_4fmaps   boot_cpu_has(X86_FEATURE_AVX512_4FMAPS)
 #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
 
+/* CPUID level 0x00000007:1.eax */
+#define cpu_has_avx512_bf16     boot_cpu_has(X86_FEATURE_AVX512_BF16)
+
 /* Synthesized. */
 #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_cpuid_faulting  boot_cpu_has(X86_FEATURE_CPUID_FAULTING)
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -262,7 +262,7 @@ XEN_CPUFEATURE(CORE_CAPS,     9*32+30) /
 XEN_CPUFEATURE(SSBD,          9*32+31) /*A  MSR_SPEC_CTRL.SSBD available */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1.eax, word 10 */
-XEN_CPUFEATURE(AVX512_BF16,  10*32+ 5) /*   AVX512 BFloat16 Instructions */
+XEN_CPUFEATURE(AVX512_BF16,  10*32+ 5) /*A  AVX512 BFloat16 Instructions */
 
 #endif /* XEN_CPUFEATURE */
 



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v5 02/10] x86emul: support MOVDIRI insn
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
  2020-03-24 12:29 ` [Xen-devel] [PATCH v5 02/10] x86emul: support AVX512_BF16 insns Jan Beulich
@ 2020-03-24 12:29 ` Jan Beulich
  2020-03-25 20:58   ` Andrew Cooper
  2020-03-24 12:30 ` [Xen-devel] [PATCH v5 01/10] x86emul: support AVX512_BF16 insns Jan Beulich
                   ` (11 subsequent siblings)
  13 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:29 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Paul Durrant, Wei Liu, Roger Pau Monne

Note that SDM revision 070 doesn't specify exception behavior for
ModRM.mod == 0b11; assuming #UD here.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Split MOVDIRI and MOVDIR64B and move this one ahead. Re-base.
v3: Update description.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -2196,6 +2196,18 @@ int main(int argc, char **argv)
         goto fail;
     printf("okay\n");
 
+    printf("%-40s", "Testing movdiri %edx,(%ecx)...");
+    instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf9; instr[3] = 0x11;
+    regs.eip = (unsigned long)&instr[0];
+    regs.ecx = (unsigned long)memset(res, -1, 16);
+    regs.edx = 0x44332211;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (regs.eip != (unsigned long)&instr[4]) ||
+         res[0] != 0x44332211 || ~res[1] )
+        goto fail;
+    printf("okay\n");
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -76,6 +76,7 @@ bool emul_test_init(void)
     cp.feat.adx = true;
     cp.feat.avx512pf = cp.feat.avx512f;
     cp.feat.rdpid = true;
+    cp.feat.movdiri = true;
     cp.extd.clzero = true;
 
     if ( cpu_has_xsave )
@@ -137,15 +138,15 @@ int emul_test_cpuid(
         res->c |= 1U << 22;
 
     /*
-     * The emulator doesn't itself use ADCX/ADOX/RDPID nor the S/G prefetch
-     * insns, so we can always run the respective tests.
+     * The emulator doesn't itself use ADCX/ADOX/RDPID/MOVDIRI nor the S/G
+     * prefetch insns, so we can always run the respective tests.
      */
     if ( leaf == 7 && subleaf == 0 )
     {
         res->b |= (1U << 10) | (1U << 19);
         if ( res->b & (1U << 16) )
             res->b |= 1U << 26;
-        res->c |= 1U << 22;
+        res->c |= (1U << 22) | (1U << 27);
     }
 
     /*
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -548,6 +548,7 @@ static const struct ext0f38_table {
     [0xf1] = { .to_mem = 1, .two_op = 1 },
     [0xf2 ... 0xf3] = {},
     [0xf5 ... 0xf7] = {},
+    [0xf9] = { .to_mem = 1 },
 };
 
 /* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
@@ -1902,6 +1903,7 @@ in_protmode(
 #define vcpu_has_avx512_bitalg() (ctxt->cpuid->feat.avx512_bitalg)
 #define vcpu_has_avx512_vpopcntdq() (ctxt->cpuid->feat.avx512_vpopcntdq)
 #define vcpu_has_rdpid()       (ctxt->cpuid->feat.rdpid)
+#define vcpu_has_movdiri()     (ctxt->cpuid->feat.movdiri)
 #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
 #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
 #define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
@@ -2713,10 +2715,12 @@ x86_decode_0f38(
     {
     case 0x00 ... 0xef:
     case 0xf2 ... 0xf5:
-    case 0xf7 ... 0xff:
+    case 0xf7 ... 0xf8:
+    case 0xfa ... 0xff:
         op_bytes = 0;
         /* fall through */
     case 0xf6: /* adcx / adox */
+    case 0xf9: /* movdiri */
         ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
         break;
 
@@ -10075,6 +10079,14 @@ x86_emulate(
                             : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
         break;
 
+    case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
+        vcpu_must_have(movdiri);
+        generate_exception_if(dst.type != OP_MEM, EXC_UD);
+        /* Ignore the non-temporal behavior for now. */
+        dst.val = src.val;
+        sfence = true;
+        break;
+
 #ifndef X86EMUL_NO_SIMD
 
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x00): /* vpermq $imm8,ymm/m256,ymm */
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -237,6 +237,7 @@ XEN_CPUFEATURE(AVX512_BITALG, 6*32+12) /
 XEN_CPUFEATURE(AVX512_VPOPCNTDQ, 6*32+14) /*A  POPCNT for vectors of DW/QW */
 XEN_CPUFEATURE(RDPID,         6*32+22) /*A  RDPID instruction */
 XEN_CPUFEATURE(CLDEMOTE,      6*32+25) /*A  CLDEMOTE instruction */
+XEN_CPUFEATURE(MOVDIRI,       6*32+27) /*A  MOVDIRI instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
 XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v5 01/10] x86emul: support AVX512_BF16 insns
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
  2020-03-24 12:29 ` [Xen-devel] [PATCH v5 02/10] x86emul: support AVX512_BF16 insns Jan Beulich
  2020-03-24 12:29 ` [Xen-devel] [PATCH v5 02/10] x86emul: support MOVDIRI insn Jan Beulich
@ 2020-03-24 12:30 ` Jan Beulich
  2020-03-25 20:02   ` Andrew Cooper
  2020-03-27 18:20   ` Andrew Cooper
  2020-03-24 12:33 ` [Xen-devel] [PATCH v5 03/10] x86: determine HAVE_AS_* just once Jan Beulich
                   ` (10 subsequent siblings)
  13 siblings, 2 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:30 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Paul Durrant, Wei Liu, Roger Pau Monne

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: New.
---
(SDE: -cpx)

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -550,6 +550,12 @@ static const struct test avx512_4vnniw_5
     INSN(p4dpwssds, f2, 0f38, 53, el_4, d, vl),
 };
 
+static const struct test avx512_bf16_all[] = {
+    INSN(vcvtne2ps2bf16, f2, 0f38, 72, vl, d, vl),
+    INSN(vcvtneps2bf16,  f3, 0f38, 72, vl, d, vl),
+    INSN(vdpbf16ps,      f3, 0f38, 52, vl, d, vl),
+};
+
 static const struct test avx512_bitalg_all[] = {
     INSN(popcnt,      66, 0f38, 54, vl, bw, vl),
     INSN(pshufbitqmb, 66, 0f38, 8f, vl,  b, vl),
@@ -984,6 +990,7 @@ void evex_disp8_test(void *instr, struct
     RUN(avx512pf, 512);
     RUN(avx512_4fmaps, 512);
     RUN(avx512_4vnniw, 512);
+    RUN(avx512_bf16, all);
     RUN(avx512_bitalg, all);
     RUN(avx512_ifma, all);
     RUN(avx512_vbmi, all);
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -4516,6 +4516,80 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    if ( stack_exec && cpu_has_avx512_bf16 )
+    {
+        decl_insn(vcvtne2ps2bf16);
+        decl_insn(vcvtneps2bf16);
+        decl_insn(vdpbf16ps);
+        static const struct {
+            float f[16];
+        } in1 = {{
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+        }}, in2 = {{
+            1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16
+        }}, out = {{
+            1 * 1 + 2 * 2, 3 * 3 + 4 * 4,
+            5 * 5 + 6 * 6, 7 * 7 + 8 * 8,
+            9 * 9 + 10 * 10, 11 * 11 + 12 * 12,
+            13 * 13 + 14 * 14, 15 * 15 + 16 * 16,
+            1 * 1 - 2 * 2, 3 * 3 - 4 * 4,
+            5 * 5 - 6 * 6, 7 * 7 - 8 * 8,
+            9 * 9 - 10 * 10, 11 * 11 - 12 * 12,
+            13 * 13 - 14 * 14, 15 * 15 - 16 * 16
+        }};
+
+        printf("%-40s", "Testing vcvtne2ps2bf16 64(%ecx),%zmm1,%zmm2...");
+        asm volatile ( "vmovups %1, %%zmm1\n"
+                       put_insn(vcvtne2ps2bf16,
+                                /* vcvtne2ps2bf16 64(%0), %%zmm1, %%zmm2 */
+                                ".byte 0x62, 0xf2, 0x77, 0x48, 0x72, 0x51, 0x01")
+                       :: "c" (NULL), "m" (in2) );
+        set_insn(vcvtne2ps2bf16);
+        regs.ecx = (unsigned long)&in1 - 64;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vcvtne2ps2bf16) )
+            goto fail;
+        printf("pending\n");
+
+        printf("%-40s", "Testing vcvtneps2bf16 64(%ecx),%ymm3...");
+        asm volatile ( put_insn(vcvtneps2bf16,
+                                /* vcvtneps2bf16 64(%0), %%ymm3 */
+                                ".byte 0x62, 0xf2, 0x7e, 0x48, 0x72, 0x59, 0x01")
+                       :: "c" (NULL) );
+        set_insn(vcvtneps2bf16);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vcvtneps2bf16) )
+            goto fail;
+        asm ( "vmovdqa %%ymm2, %%ymm5\n\t"
+              "vpcmpeqd %%zmm3, %%zmm5, %%k0\n\t"
+              "kmovw %%k0, %0"
+              : "=g" (rc) : "m" (out) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("pending\n");
+
+        printf("%-40s", "Testing vdpbf16ps 128(%ecx),%zmm2,%zmm4...");
+        asm volatile ( "vmovdqa %%ymm3, %0\n\t"
+                       "vmovdqa %%ymm3, %1\n"
+                       put_insn(vdpbf16ps,
+                                /* vdpbf16ps 128(%2), %%zmm2, %%zmm4 */
+                                ".byte 0x62, 0xf2, 0x6e, 0x48, 0x52, 0x61, 0x02")
+                       : "=&m" (res[0]), "=&m" (res[8])
+                       : "c" (NULL)
+                       : "memory" );
+        set_insn(vdpbf16ps);
+        regs.ecx = (unsigned long)res - 128;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vdpbf16ps) )
+            goto fail;
+        asm ( "vcmpeqps %1, %%zmm4, %%k0\n\t"
+              "kmovw %%k0, %0"
+              : "=g" (rc) : "m" (out) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+
     printf("%-40s", "Testing invpcid 16(%ecx),%%edx...");
     if ( stack_exec )
     {
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -156,6 +156,7 @@ static inline bool xcr0_mask(uint64_t ma
 #define cpu_has_avx512_vpopcntdq (cp.feat.avx512_vpopcntdq && xcr0_mask(0xe6))
 #define cpu_has_avx512_4vnniw (cp.feat.avx512_4vnniw && xcr0_mask(0xe6))
 #define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6))
+#define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6))
 
 #define cpu_has_xgetbv1   (cpu_has_xsave && cp.xstate.xgetbv1)
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1904,6 +1904,7 @@ in_protmode(
 #define vcpu_has_rdpid()       (ctxt->cpuid->feat.rdpid)
 #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
 #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
+#define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
 
 #define vcpu_must_have(feat) \
     generate_exception_if(!vcpu_has_##feat(), EXC_UD)
@@ -9152,6 +9153,19 @@ x86_emulate(
         generate_exception_if(evex.w, EXC_UD);
         goto avx512f_no_sae;
 
+    case X86EMUL_OPC_EVEX_F2(0x0f38, 0x72): /* vcvtne2ps2bf16 [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x72): /* vcvtneps2bf16 [xyz]mm/mem,{x,y}mm{k} */
+        if ( evex.pfx == vex_f2 )
+            fault_suppression = false;
+        else
+            d |= TwoOp;
+        /* fall through */
+    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x52): /* vdpbf16ps [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        host_and_vcpu_must_have(avx512_bf16);
+        generate_exception_if(evex.w, EXC_UD);
+        op_bytes = 16 << evex.lr;
+        goto avx512f_no_sae;
+
     case X86EMUL_OPC_VEX_66(0x0f38, 0x58): /* vpbroadcastd xmm/m32,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x59): /* vpbroadcastq xmm/m64,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x78): /* vpbroadcastb xmm/m8,{x,y}mm */
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -129,6 +129,9 @@
 #define cpu_has_avx512_4fmaps   boot_cpu_has(X86_FEATURE_AVX512_4FMAPS)
 #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
 
+/* CPUID level 0x00000007:1.eax */
+#define cpu_has_avx512_bf16     boot_cpu_has(X86_FEATURE_AVX512_BF16)
+
 /* Synthesized. */
 #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_cpuid_faulting  boot_cpu_has(X86_FEATURE_CPUID_FAULTING)
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -262,7 +262,7 @@ XEN_CPUFEATURE(CORE_CAPS,     9*32+30) /
 XEN_CPUFEATURE(SSBD,          9*32+31) /*A  MSR_SPEC_CTRL.SSBD available */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1.eax, word 10 */
-XEN_CPUFEATURE(AVX512_BF16,  10*32+ 5) /*   AVX512 BFloat16 Instructions */
+XEN_CPUFEATURE(AVX512_BF16,  10*32+ 5) /*A  AVX512 BFloat16 Instructions */
 
 #endif /* XEN_CPUFEATURE */
 



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v5 03/10] x86: determine HAVE_AS_* just once
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
                   ` (2 preceding siblings ...)
  2020-03-24 12:30 ` [Xen-devel] [PATCH v5 01/10] x86emul: support AVX512_BF16 insns Jan Beulich
@ 2020-03-24 12:33 ` Jan Beulich
  2020-03-25 21:12   ` Andrew Cooper
  2020-03-24 12:33 ` [Xen-devel] [PATCH v5 04/10] x86: move back clang no integrated assembler tests Jan Beulich
                   ` (9 subsequent siblings)
  13 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:33 UTC (permalink / raw)
  To: xen-devel
  Cc: Stefano Stabellini, Julien Grall, Wei Liu, Andrew Cooper,
	Ian Jackson, George Dunlap, Paul Durrant, Roger Pau Monne

With the exception of HAVE_AS_QUOTED_SYM, populate the results into a
generated header instead of (at least once per [sub]directory) into
CFLAGS. This results in proper rebuilds (via make dependencies) in case
the compiler used changes between builds. It additionally eases
inspection of which assembler features were actually found usable.

Some trickery is needed to avoid header generation itself to try to
include the to-be/not-yet-generated header.

Since the definitions in generated/config.h, previously having been
command line options, might even affect xen/config.h or its descendants,
move adding of the -include option for the latter after inclusion of the
per-arch Rules.mk. Use the occasion to also move the most general -I
option to the common Rules.mk.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: Re-base.
v4: New.
---
An alternative to the $(MAKECMDGOALS) trickery would be to make
generation of generated/config.h part of the asm-offsets.s rule, instead
of adding it as a dependency there. Not sure whether either is truly
better than the other.

--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -55,7 +55,7 @@ endif
 CFLAGS += -nostdinc -fno-builtin -fno-common
 CFLAGS += -Werror -Wredundant-decls -Wno-pointer-arith
 $(call cc-option-add,CFLAGS,CC,-Wvla)
-CFLAGS += -pipe -D__XEN__ -include $(BASEDIR)/include/xen/config.h
+CFLAGS += -pipe -D__XEN__ -I$(BASEDIR)/include
 CFLAGS-$(CONFIG_DEBUG_INFO) += -g
 CFLAGS += '-D__OBJECT_FILE__="$@"'
 
@@ -95,6 +95,9 @@ SPECIAL_DATA_SECTIONS := rodata $(foreac
 
 include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk
 
+# Allow the arch to use -include ahead of this one.
+CFLAGS += -include xen/config.h
+
 include Makefile
 
 define gendep
--- a/xen/arch/arm/Rules.mk
+++ b/xen/arch/arm/Rules.mk
@@ -6,8 +6,6 @@
 # 'make clean' before rebuilding.
 #
 
-CFLAGS += -I$(BASEDIR)/include
-
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
 
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -225,7 +225,8 @@ endif
 efi/boot.init.o efi/runtime.o efi/compat.o efi/buildid.o efi/relocs-dummy.o: $(BASEDIR)/arch/x86/efi/built_in.o
 efi/boot.init.o efi/runtime.o efi/compat.o efi/buildid.o efi/relocs-dummy.o: ;
 
-asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(BASEDIR)/include/asm-x86/asm-macros.h
+asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(BASEDIR)/include/asm-x86/asm-macros.h \
+	$(BASEDIR)/include/generated/config.h
 	$(CC) $(filter-out -Wa$(comma)% -flto,$(CFLAGS)) -S -o $@ $<
 
 asm-macros.i: CFLAGS += -D__ASSEMBLY__ -P
@@ -241,6 +242,45 @@ $(BASEDIR)/include/asm-x86/asm-macros.h:
 	echo '#endif' >>$@.new
 	$(call move-if-changed,$@.new,$@)
 
+# There are multiple invocations of this Makefile, one each for asm-offset.s,
+# $(TARGET), built_in.o, and several more from the rules building $(TARGET)
+# and $(TARGET).efi. The 2nd and 3rd may race with one another, and we don't
+# want to re-generate config.h in that case anyway, so guard the logic
+# accordingly. (We do want to have the FORCE dependency on the rule, to be
+# sure we pick up changes when the compiler used has changed.)
+ifeq ($(MAKECMDGOALS),asm-offsets.s)
+
+as-ISA-list := CLWB EPT FSGSBASE INVPCID RDRAND RDSEED SSE4_2 VMX XSAVEOPT
+
+CLWB-insn	:= clwb (%rax)
+EPT-insn	:= invept (%rax),%rax
+FSGSBASE-insn	:= rdfsbase %rax
+INVPCID-insn	:= invpcid (%rax),%rax
+RDRAND-insn	:= rdrand %eax
+RDSEED-insn	:= rdseed %eax
+SSE4_2-insn	:= crc32 %eax,%eax
+VMX-insn	:= vmcall
+XSAVEOPT-insn	:= xsaveopt (%rax)
+
+# GAS's idea of true is -1.  Clang's idea is 1.
+NEGATIVE_TRUE-insn := .if ((1 > 0) > 0); .error \"\"; .endif
+
+# Check to see whether the assembler supports the .nops directive.
+NOPS_DIRECTIVE-insn := .L1: .L2: .nops (.L2 - .L1),9
+
+as-features-list := $(as-ISA-list) NEGATIVE_TRUE NOPS_DIRECTIVE
+
+$(BASEDIR)/include/generated/config.h: FORCE
+	echo '/* Generated header, do not edit. */' >$@.new
+	$(foreach f,$(as-features-list), \
+	  $(if $($(f)-insn),,$(error $(f)-insn is empty)) \
+	  echo '#$(call as-insn,$(CC) $(CFLAGS),"$($(f)-insn)", \
+	           define, \
+	           undef) HAVE_AS_$(f) /* $($(f)-insn) */' >>$@.new;)
+	$(call move-if-changed,$@.new,$@)
+
+endif
+
 efi.lds: AFLAGS += -DEFI
 xen.lds efi.lds: xen.lds.S
 	$(CC) -P -E -Ui386 $(filter-out -Wa$(comma)%,$(AFLAGS)) -o $@ $<
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -3,7 +3,7 @@
 
 XEN_IMG_OFFSET := 0x200000
 
-CFLAGS += -I$(BASEDIR)/include
+CFLAGS += $(if $(filter asm-macros.% %/generated/config.h,$@),,-include generated/config.h)
 CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-generic
 CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-default
 CFLAGS += -DXEN_IMG_OFFSET=$(XEN_IMG_OFFSET)
@@ -38,26 +38,9 @@ endif
 
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
-$(call as-option-add,CFLAGS,CC,"vmcall",-DHAVE_AS_VMX)
-$(call as-option-add,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_AS_SSE4_2)
-$(call as-option-add,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_AS_EPT)
-$(call as-option-add,CFLAGS,CC,"rdrand %eax",-DHAVE_AS_RDRAND)
-$(call as-option-add,CFLAGS,CC,"rdfsbase %rax",-DHAVE_AS_FSGSBASE)
-$(call as-option-add,CFLAGS,CC,"xsaveopt (%rax)",-DHAVE_AS_XSAVEOPT)
-$(call as-option-add,CFLAGS,CC,"rdseed %eax",-DHAVE_AS_RDSEED)
-$(call as-option-add,CFLAGS,CC,"clwb (%rax)",-DHAVE_AS_CLWB)
 $(call as-option-add,CFLAGS,CC,".equ \"x\"$$(comma)1", \
                      -U__OBJECT_LABEL__ -DHAVE_AS_QUOTED_SYM \
                      '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@')
-$(call as-option-add,CFLAGS,CC,"invpcid (%rax)$$(comma)%rax",-DHAVE_AS_INVPCID)
-
-# GAS's idea of true is -1.  Clang's idea is 1
-$(call as-option-add,CFLAGS,CC,\
-    ".if ((1 > 0) < 0); .error \"\";.endif",,-DHAVE_AS_NEGATIVE_TRUE)
-
-# Check to see whether the assmbler supports the .nop directive.
-$(call as-option-add,CFLAGS,CC,\
-    ".L1: .L2: .nops (.L2 - .L1)$$(comma)9",-DHAVE_AS_NOPS_DIRECTIVE)
 
 CFLAGS += -mno-red-zone -fpic -fno-asynchronous-unwind-tables
 
--- a/xen/include/Makefile
+++ b/xen/include/Makefile
@@ -64,7 +64,7 @@ compat/%.h: compat/%.i Makefile $(BASEDI
 	mv -f $@.new $@
 
 compat/%.i: compat/%.c Makefile
-	$(CPP) $(filter-out -Wa$(comma)% -M% %.d -include %/include/xen/config.h,$(CFLAGS)) $(cppflags-y) -o $@ $<
+	$(CPP) $(filter-out -Wa$(comma)% -M% %.d -include %/config.h,$(CFLAGS)) $(cppflags-y) -o $@ $<
 
 compat/%.c: public/%.h xlat.lst Makefile $(BASEDIR)/tools/compat-build-source.py
 	mkdir -p $(@D)
--- a/xen/scripts/Kbuild.include
+++ b/xen/scripts/Kbuild.include
@@ -10,7 +10,7 @@ DEPS_INCLUDE = $(addsuffix .d2, $(basena
 # as-insn: Check whether assembler supports an instruction.
 # Usage: cflags-y += $(call as-insn,CC FLAGS,"insn",option-yes,option-no)
 as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
-                       | $(filter-out -M% %.d -include %/include/xen/config.h,$(1)) \
+                       | $(filter-out -M% %.d -include %/config.h,$(1)) \
                               -c -x c -o /dev/null - 2>&1),$(4),$(3))
 
 # as-option-add: Conditionally add options to flags



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v5 04/10] x86: move back clang no integrated assembler tests
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
                   ` (3 preceding siblings ...)
  2020-03-24 12:33 ` [Xen-devel] [PATCH v5 03/10] x86: determine HAVE_AS_* just once Jan Beulich
@ 2020-03-24 12:33 ` Jan Beulich
  2020-03-24 12:34 ` [Xen-devel] [PATCH v5 05/10] x86emul: support MOVDIR64B insn Jan Beulich
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:33 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Paul Durrant, Wei Liu, Roger Pau Monne

This largely reverts f19af2f1138e ("x86: re-order clang no integrated
assembler tests"): Other CFLAGS setup would better happen first, in case
any of it affects the behavior of the integrated assembler. The comment
addition of course doesn't get undone. The only remaining as-option-add
invocation gets moved down in addition.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
---
v5: Re-base.
v4: New.

--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -12,35 +12,8 @@ CFLAGS += '-D__OBJECT_LABEL__=$(subst /,
 # Prevent floating-point variables from creeping into Xen.
 CFLAGS += -msoft-float
 
-ifeq ($(CONFIG_CC_IS_CLANG),y)
-# Note: Any test which adds -no-integrated-as will cause subsequent tests to
-# succeed, and not trigger further additions.
-#
-# The tests to select whether the integrated assembler is usable need to happen
-# before testing any assembler features, or else the result of the tests would
-# be stale if the integrated assembler is not used.
-
-# Older clang's built-in assembler doesn't understand .skip with labels:
-# https://bugs.llvm.org/show_bug.cgi?id=27369
-$(call as-option-add,CFLAGS,CC,".L0: .L1: .skip (.L1 - .L0)",,\
-                     -no-integrated-as)
-
-# Check whether clang asm()-s support .include.
-$(call as-option-add,CFLAGS,CC,".include \"asm-x86/indirect_thunk_asm.h\"",,\
-                     -no-integrated-as)
-
-# Check whether clang keeps .macro-s between asm()-s:
-# https://bugs.llvm.org/show_bug.cgi?id=36110
-$(call as-option-add,CFLAGS,CC,\
-                     ".macro FOO;.endm"$$(close); asm volatile $$(open)".macro FOO;.endm",\
-                     -no-integrated-as)
-endif
-
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
-$(call as-option-add,CFLAGS,CC,".equ \"x\"$$(comma)1", \
-                     -U__OBJECT_LABEL__ -DHAVE_AS_QUOTED_SYM \
-                     '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@')
 
 CFLAGS += -mno-red-zone -fpic -fno-asynchronous-unwind-tables
 
@@ -70,3 +43,30 @@ endif
 # Set up the assembler include path properly for older toolchains.
 CFLAGS += -Wa,-I$(BASEDIR)/include
 
+ifeq ($(CONFIG_CC_IS_CLANG),y)
+# Note: Any test which adds -no-integrated-as will cause subsequent tests to
+# succeed, and not trigger further additions.
+#
+# The tests to select whether the integrated assembler is usable need to happen
+# before testing any assembler features, or else the result of the tests would
+# be stale if the integrated assembler is not used.
+
+# Older clang's built-in assembler doesn't understand .skip with labels:
+# https://bugs.llvm.org/show_bug.cgi?id=27369
+$(call as-option-add,CFLAGS,CC,".L0: .L1: .skip (.L1 - .L0)",,\
+                     -no-integrated-as)
+
+# Check whether clang asm()-s support .include.
+$(call as-option-add,CFLAGS,CC,".include \"asm-x86/indirect_thunk_asm.h\"",,\
+                     -no-integrated-as)
+
+# Check whether clang keeps .macro-s between asm()-s:
+# https://bugs.llvm.org/show_bug.cgi?id=36110
+$(call as-option-add,CFLAGS,CC,\
+                     ".macro FOO;.endm"$$(close); asm volatile $$(open)".macro FOO;.endm",\
+                     -no-integrated-as)
+endif
+
+$(call as-option-add,CFLAGS,CC,".equ \"x\"$$(comma)1", \
+                     -U__OBJECT_LABEL__ -DHAVE_AS_QUOTED_SYM \
+                     '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@')



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v5 05/10] x86emul: support MOVDIR64B insn
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
                   ` (4 preceding siblings ...)
  2020-03-24 12:33 ` [Xen-devel] [PATCH v5 04/10] x86: move back clang no integrated assembler tests Jan Beulich
@ 2020-03-24 12:34 ` Jan Beulich
  2020-03-25 11:19   ` Paul Durrant
  2020-03-24 12:34 ` [Xen-devel] [PATCH v5 06/10] x86emul: support ENQCMD insn Jan Beulich
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:34 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Paul Durrant, Wei Liu, Roger Pau Monne

Introduce a new blk() hook, paralleling the rmw() on in certain way, but
being intended for larger data sizes, and hence its HVM intermediate
handling function doesn't fall back to splitting the operation if the
requested virtual address can't be mapped.

Note that SDM revision 071 doesn't specify exception behavior for
ModRM.mod == 0b11; assuming #UD here.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: If we want to avoid depending on correct MTRR settings,
     hvmemul_map_linear_addr() may need to gain a parameter to allow
     controlling cachability of the produced mapping(s). Of course the
     function will also need to be made capable of mapping at least
     p2m_mmio_direct pages for this and the two ENQCMD insns to be
     actually useful.
---
v5: Introduce/use ->blk() hook. Correct asm() operands.
v4: Split MOVDIRI and MOVDIR64B. Switch to using ->rmw(). Re-base.
v3: Update description.
---
(SDE: -tnt)

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -652,6 +652,18 @@ static int cmpxchg(
     return X86EMUL_OKAY;
 }
 
+static int blk(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_data,
+    unsigned int bytes,
+    uint32_t *eflags,
+    struct x86_emulate_state *state,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return x86_emul_blk((void *)offset, p_data, bytes, eflags, state, ctxt);
+}
+
 static int read_segment(
     enum x86_segment seg,
     struct segment_register *reg,
@@ -2208,6 +2220,35 @@ int main(int argc, char **argv)
         goto fail;
     printf("okay\n");
 
+    printf("%-40s", "Testing movdir64b 144(%edx),%ecx...");
+    if ( stack_exec && cpu_has_movdir64b )
+    {
+        emulops.blk = blk;
+
+        instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf8;
+        instr[4] = 0x8a; instr[5] = 0x90; instr[8] = instr[7] = instr[6] = 0;
+
+        regs.eip = (unsigned long)&instr[0];
+        for ( i = 0; i < 64; ++i )
+            res[i] = i - 20;
+        regs.edx = (unsigned long)res;
+        regs.ecx = (unsigned long)(res + 16);
+
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             (regs.eip != (unsigned long)&instr[9]) ||
+             res[15] != -5 || res[32] != 12 )
+            goto fail;
+        for ( i = 16; i < 32; ++i )
+            if ( res[i] != i )
+                goto fail;
+        printf("okay\n");
+
+        emulops.blk = NULL;
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -154,6 +154,7 @@ static inline bool xcr0_mask(uint64_t ma
 #define cpu_has_avx512_vnni (cp.feat.avx512_vnni && xcr0_mask(0xe6))
 #define cpu_has_avx512_bitalg (cp.feat.avx512_bitalg && xcr0_mask(0xe6))
 #define cpu_has_avx512_vpopcntdq (cp.feat.avx512_vpopcntdq && xcr0_mask(0xe6))
+#define cpu_has_movdir64b  cp.feat.movdir64b
 #define cpu_has_avx512_4vnniw (cp.feat.avx512_4vnniw && xcr0_mask(0xe6))
 #define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6))
 #define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6))
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -250,12 +250,13 @@ $(BASEDIR)/include/asm-x86/asm-macros.h:
 # sure we pick up changes when the compiler used has changed.)
 ifeq ($(MAKECMDGOALS),asm-offsets.s)
 
-as-ISA-list := CLWB EPT FSGSBASE INVPCID RDRAND RDSEED SSE4_2 VMX XSAVEOPT
+as-ISA-list := CLWB EPT FSGSBASE INVPCID MOVDIR64B RDRAND RDSEED SSE4_2 VMX XSAVEOPT
 
 CLWB-insn	:= clwb (%rax)
 EPT-insn	:= invept (%rax),%rax
 FSGSBASE-insn	:= rdfsbase %rax
 INVPCID-insn	:= invpcid (%rax),%rax
+MOVDIR64B-insn	:= movdir64b (%rax),%rax
 RDRAND-insn	:= rdrand %eax
 RDSEED-insn	:= rdseed %eax
 SSE4_2-insn	:= crc32 %eax,%eax
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1409,6 +1409,44 @@ static int hvmemul_rmw(
     return rc;
 }
 
+static int hvmemul_blk(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_data,
+    unsigned int bytes,
+    uint32_t *eflags,
+    struct x86_emulate_state *state,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    unsigned long addr;
+    uint32_t pfec = PFEC_page_present | PFEC_write_access;
+    int rc;
+    void *mapping = NULL;
+
+    rc = hvmemul_virtual_to_linear(
+        seg, offset, bytes, NULL, hvm_access_write, hvmemul_ctxt, &addr);
+    if ( rc != X86EMUL_OKAY || !bytes )
+        return rc;
+
+    if ( is_x86_system_segment(seg) )
+        pfec |= PFEC_implicit;
+    else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
+        pfec |= PFEC_user_mode;
+
+    mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt);
+    if ( IS_ERR(mapping) )
+        return ~PTR_ERR(mapping);
+    if ( !mapping )
+        return X86EMUL_UNHANDLEABLE;
+
+    rc = x86_emul_blk(mapping, p_data, bytes, eflags, state, ctxt);
+    hvmemul_unmap_linear_addr(mapping, addr, bytes, hvmemul_ctxt);
+
+    return rc;
+}
+
 static int hvmemul_write_discard(
     enum x86_segment seg,
     unsigned long offset,
@@ -2475,6 +2513,7 @@ static const struct x86_emulate_ops hvm_
     .write         = hvmemul_write,
     .rmw           = hvmemul_rmw,
     .cmpxchg       = hvmemul_cmpxchg,
+    .blk           = hvmemul_blk,
     .validate      = hvmemul_validate,
     .rep_ins       = hvmemul_rep_ins,
     .rep_outs      = hvmemul_rep_outs,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -548,6 +548,7 @@ static const struct ext0f38_table {
     [0xf1] = { .to_mem = 1, .two_op = 1 },
     [0xf2 ... 0xf3] = {},
     [0xf5 ... 0xf7] = {},
+    [0xf8] = { .simd_size = simd_other },
     [0xf9] = { .to_mem = 1 },
 };
 
@@ -852,6 +853,9 @@ struct x86_emulate_state {
         rmw_xchg,
         rmw_xor,
     } rmw;
+    enum {
+        blk_movdir,
+    } blk;
     uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
     uint8_t sib_index, sib_scale;
     uint8_t rex_prefix;
@@ -1904,6 +1908,7 @@ in_protmode(
 #define vcpu_has_avx512_vpopcntdq() (ctxt->cpuid->feat.avx512_vpopcntdq)
 #define vcpu_has_rdpid()       (ctxt->cpuid->feat.rdpid)
 #define vcpu_has_movdiri()     (ctxt->cpuid->feat.movdiri)
+#define vcpu_has_movdir64b()   (ctxt->cpuid->feat.movdir64b)
 #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
 #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
 #define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
@@ -10079,6 +10084,23 @@ x86_emulate(
                             : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
         break;
 
+    case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */
+        host_and_vcpu_must_have(movdir64b);
+        generate_exception_if(ea.type != OP_MEM, EXC_UD);
+        src.val = truncate_ea(*dst.reg);
+        generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops),
+                              EXC_GP, 0);
+        fail_if(!ops->blk);
+        state->blk = blk_movdir;
+        BUILD_BUG_ON(sizeof(*mmvalp) < 64);
+        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64,
+                             ctxt)) != X86EMUL_OKAY ||
+             (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags,
+                            state, ctxt)) != X86EMUL_OKAY )
+            goto done;
+        state->simd_size = simd_none;
+        break;
+
     case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
         vcpu_must_have(movdiri);
         generate_exception_if(dst.type != OP_MEM, EXC_UD);
@@ -11345,6 +11367,45 @@ int x86_emul_rmw(
 
     return X86EMUL_OKAY;
 }
+
+int x86_emul_blk(
+    void *ptr,
+    void *data,
+    unsigned int bytes,
+    uint32_t *eflags,
+    struct x86_emulate_state *state,
+    struct x86_emulate_ctxt *ctxt)
+{
+    switch ( state->blk )
+    {
+        /*
+         * Throughout this switch(), memory clobbers are used to compensate
+         * that other operands may not properly express the (full) memory
+         * ranges covered.
+         */
+    case blk_movdir:
+        ASSERT(bytes == 64);
+        if ( ((unsigned long)ptr & 0x3f) )
+        {
+            ASSERT_UNREACHABLE();
+            return X86EMUL_UNHANDLEABLE;
+        }
+#ifdef HAVE_AS_MOVDIR64B
+        asm ( "movdir64b (%0), %1" :: "r" (data), "r" (ptr) : "memory" );
+#else
+        /* movdir64b (%rsi), %rdi */
+        asm ( ".byte 0x66, 0x0f, 0x38, 0xf8, 0x3e"
+              :: "S" (data), "D" (ptr) : "memory" );
+#endif
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    return X86EMUL_OKAY;
+}
 
 static void __init __maybe_unused build_assertions(void)
 {
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -310,6 +310,22 @@ struct x86_emulate_ops
         struct x86_emulate_ctxt *ctxt);
 
     /*
+     * blk: Emulate a large (block) memory access.
+     * @p_data: [IN/OUT] (optional) Pointer to source/destination buffer.
+     * @eflags: [IN/OUT] Pointer to EFLAGS to be updated according to
+     *                   instruction effects.
+     * @state:  [IN/OUT] Pointer to (opaque) emulator state.
+     */
+    int (*blk)(
+        enum x86_segment seg,
+        unsigned long offset,
+        void *p_data,
+        unsigned int bytes,
+        uint32_t *eflags,
+        struct x86_emulate_state *state,
+        struct x86_emulate_ctxt *ctxt);
+
+    /*
      * validate: Post-decode, pre-emulate hook to allow caller controlled
      * filtering.
      */
@@ -793,6 +809,14 @@ x86_emul_rmw(
     unsigned int bytes,
     uint32_t *eflags,
     struct x86_emulate_state *state,
+    struct x86_emulate_ctxt *ctxt);
+int
+x86_emul_blk(
+    void *ptr,
+    void *data,
+    unsigned int bytes,
+    uint32_t *eflags,
+    struct x86_emulate_state *state,
     struct x86_emulate_ctxt *ctxt);
 
 static inline void x86_emul_hw_exception(
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -120,6 +120,7 @@
 #define cpu_has_avx512_bitalg   boot_cpu_has(X86_FEATURE_AVX512_BITALG)
 #define cpu_has_avx512_vpopcntdq boot_cpu_has(X86_FEATURE_AVX512_VPOPCNTDQ)
 #define cpu_has_rdpid           boot_cpu_has(X86_FEATURE_RDPID)
+#define cpu_has_movdir64b       boot_cpu_has(X86_FEATURE_MOVDIR64B)
 
 /* CPUID level 0x80000007.edx */
 #define cpu_has_itsc            boot_cpu_has(X86_FEATURE_ITSC)
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -238,6 +238,7 @@ XEN_CPUFEATURE(AVX512_VPOPCNTDQ, 6*32+14
 XEN_CPUFEATURE(RDPID,         6*32+22) /*A  RDPID instruction */
 XEN_CPUFEATURE(CLDEMOTE,      6*32+25) /*A  CLDEMOTE instruction */
 XEN_CPUFEATURE(MOVDIRI,       6*32+27) /*A  MOVDIRI instruction */
+XEN_CPUFEATURE(MOVDIR64B,     6*32+28) /*A  MOVDIR64B instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
 XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v5 06/10] x86emul: support ENQCMD insn
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
                   ` (5 preceding siblings ...)
  2020-03-24 12:34 ` [Xen-devel] [PATCH v5 05/10] x86emul: support MOVDIR64B insn Jan Beulich
@ 2020-03-24 12:34 ` Jan Beulich
  2020-03-24 12:35 ` [Xen-devel] [PATCH v5 07/10] x86/HVM: scale MPERF values reported to guests (on AMD) Jan Beulich
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:34 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Paul Durrant, Wei Liu, Roger Pau Monne

Introduce a new blk() hook, paralleling the rmw() on in certain way, but
being intended for larger data sizes, and hence its HVM intermediate
handling function doesn't fall back to splitting the operation if the
requested virtual address can't be mapped.

Note that the ISA extensions document revision 037 doesn't specify
exception behavior for ModRM.mod == 0b11; assuming #UD here.

No tests are being added to the harness - this would be quite hard,
we can't just issue the insns against RAM. Their similarity with
MOVDIR64B should have the test case there be god enough to cover any
fundamental flaws.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: This doesn't (can't) consult PASID translation tables yet, as we
     have no VMX code for this so far. I guess for this we will want to
     replace the direct ->read_msr(MSR_IA32_PASID, ...) with a new
     ->read_pasid() hook.
---
v5: New.

--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -250,13 +250,14 @@ $(BASEDIR)/include/asm-x86/asm-macros.h:
 # sure we pick up changes when the compiler used has changed.)
 ifeq ($(MAKECMDGOALS),asm-offsets.s)
 
-as-ISA-list := CLWB EPT FSGSBASE INVPCID MOVDIR64B RDRAND RDSEED SSE4_2 VMX XSAVEOPT
+as-ISA-list := CLWB ENQCMD EPT FSGSBASE INVPCID MOVDIR64B RDRAND RDSEED SSE4_2 VMX XSAVEOPT
 
 CLWB-insn	:= clwb (%rax)
 EPT-insn	:= invept (%rax),%rax
 FSGSBASE-insn	:= rdfsbase %rax
 INVPCID-insn	:= invpcid (%rax),%rax
 MOVDIR64B-insn	:= movdir64b (%rax),%rax
+ENQCMD-insn	:= enqcmd (%rax),%rax
 RDRAND-insn	:= rdrand %eax
 RDSEED-insn	:= rdseed %eax
 SSE4_2-insn	:= crc32 %eax,%eax
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -854,6 +854,7 @@ struct x86_emulate_state {
         rmw_xor,
     } rmw;
     enum {
+        blk_enqcmd,
         blk_movdir,
     } blk;
     uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
@@ -900,6 +901,7 @@ typedef union {
     uint64_t __attribute__ ((aligned(16))) xmm[2];
     uint64_t __attribute__ ((aligned(32))) ymm[4];
     uint64_t __attribute__ ((aligned(64))) zmm[8];
+    uint32_t data32[16];
 } mmval_t;
 
 /*
@@ -1909,6 +1911,7 @@ in_protmode(
 #define vcpu_has_rdpid()       (ctxt->cpuid->feat.rdpid)
 #define vcpu_has_movdiri()     (ctxt->cpuid->feat.movdiri)
 #define vcpu_has_movdir64b()   (ctxt->cpuid->feat.movdir64b)
+#define vcpu_has_enqcmd()      (ctxt->cpuid->feat.enqcmd)
 #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
 #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
 #define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
@@ -10101,6 +10104,36 @@ x86_emulate(
         state->simd_size = simd_none;
         break;
 
+    case X86EMUL_OPC_F2(0x0f38, 0xf8): /* enqcmd r,m512 */
+    case X86EMUL_OPC_F3(0x0f38, 0xf8): /* enqcmds r,m512 */
+        host_and_vcpu_must_have(enqcmd);
+        generate_exception_if(ea.type != OP_MEM, EXC_UD);
+        generate_exception_if(vex.pfx != vex_f2 && !mode_ring0(), EXC_GP, 0);
+        src.val = truncate_ea(*dst.reg);
+        generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops),
+                              EXC_GP, 0);
+        fail_if(!ops->blk);
+        BUILD_BUG_ON(sizeof(*mmvalp) < 64);
+        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64,
+                             ctxt)) != X86EMUL_OKAY )
+            goto done;
+        if ( vex.pfx == vex_f2 ) /* enqcmd */
+        {
+            fail_if(!ops->read_msr);
+            if ( (rc = ops->read_msr(MSR_IA32_PASID,
+                                     &msr_val, ctxt)) != X86EMUL_OKAY )
+                goto done;
+            generate_exception_if(!(msr_val & PASID_VALID), EXC_GP, 0);
+            mmvalp->data32[0] = MASK_EXTR(msr_val, PASID_PASID_MASK);
+        }
+        mmvalp->data32[0] &= ~0x7ff00000;
+        state->blk = blk_enqcmd;
+        if ( (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags,
+                            state, ctxt)) != X86EMUL_OKAY )
+            goto done;
+        state->simd_size = simd_none;
+        break;
+
     case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
         vcpu_must_have(movdiri);
         generate_exception_if(dst.type != OP_MEM, EXC_UD);
@@ -11378,11 +11411,36 @@ int x86_emul_blk(
 {
     switch ( state->blk )
     {
+        bool zf;
+
         /*
          * Throughout this switch(), memory clobbers are used to compensate
          * that other operands may not properly express the (full) memory
          * ranges covered.
          */
+    case blk_enqcmd:
+        ASSERT(bytes == 64);
+        if ( ((unsigned long)ptr & 0x3f) )
+        {
+            ASSERT_UNREACHABLE();
+            return X86EMUL_UNHANDLEABLE;
+        }
+        *eflags &= ~EFLAGS_MASK;
+#ifdef HAVE_AS_ENQCMD
+        asm ( "enqcmds (%[src]), %[dst]" ASM_FLAG_OUT(, "; setz %0")
+              : [zf] ASM_FLAG_OUT("=@ccz", "=qm") (zf)
+              : [src] "r" (data), [dst] "r" (ptr) : "memory" );
+#else
+        /* enqcmds (%rsi), %rdi */
+        asm ( ".byte 0xf3, 0x0f, 0x38, 0xf8, 0x3e"
+              ASM_FLAG_OUT(, "; setz %[zf]")
+              : [zf] ASM_FLAG_OUT("=@ccz", "=qm") (zf)
+              : "S" (data), "D" (ptr) : "memory" );
+#endif
+        if ( zf )
+            *eflags |= X86_EFLAGS_ZF;
+        break;
+
     case blk_movdir:
         ASSERT(bytes == 64);
         if ( ((unsigned long)ptr & 0x3f) )
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -121,6 +121,7 @@
 #define cpu_has_avx512_vpopcntdq boot_cpu_has(X86_FEATURE_AVX512_VPOPCNTDQ)
 #define cpu_has_rdpid           boot_cpu_has(X86_FEATURE_RDPID)
 #define cpu_has_movdir64b       boot_cpu_has(X86_FEATURE_MOVDIR64B)
+#define cpu_has_enqcmd          boot_cpu_has(X86_FEATURE_ENQCMD)
 
 /* CPUID level 0x80000007.edx */
 #define cpu_has_itsc            boot_cpu_has(X86_FEATURE_ITSC)
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -412,6 +412,10 @@
 #define MSR_IA32_TSC_DEADLINE		0x000006E0
 #define MSR_IA32_ENERGY_PERF_BIAS	0x000001b0
 
+#define MSR_IA32_PASID			0x00000d93
+#define  PASID_PASID_MASK		0x000fffff
+#define  PASID_VALID			0x80000000
+
 /* Platform Shared Resource MSRs */
 #define MSR_IA32_CMT_EVTSEL		0x00000c8d
 #define MSR_IA32_CMT_EVTSEL_UE_MASK	0x0000ffff
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -239,6 +239,7 @@ XEN_CPUFEATURE(RDPID,         6*32+22) /
 XEN_CPUFEATURE(CLDEMOTE,      6*32+25) /*A  CLDEMOTE instruction */
 XEN_CPUFEATURE(MOVDIRI,       6*32+27) /*A  MOVDIRI instruction */
 XEN_CPUFEATURE(MOVDIR64B,     6*32+28) /*A  MOVDIR64B instruction */
+XEN_CPUFEATURE(ENQCMD,        6*32+29) /*   ENQCMD{,S} instructions */
 
 /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
 XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v5 07/10] x86/HVM: scale MPERF values reported to guests (on AMD)
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
                   ` (6 preceding siblings ...)
  2020-03-24 12:34 ` [Xen-devel] [PATCH v5 06/10] x86emul: support ENQCMD insn Jan Beulich
@ 2020-03-24 12:35 ` Jan Beulich
  2020-03-24 12:36 ` [Xen-devel] [PATCH v5 08/10] x86emul: support RDPRU Jan Beulich
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:35 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Paul Durrant, Wei Liu, Roger Pau Monne

AMD's PM specifies that MPERF (and its r/o counterpart) reads are
affected by the TSC ratio. Hence when processing such reads in software
we too should scale the values. While we don't currently (yet) expose
the underlying feature flags, besides us allowing the MSRs to be read
nevertheless, RDPRU is going to expose the values even to user space.

Furthermore, due to the not exposed feature flags, this change has the
effect of making properly inaccessible (for reads) the two MSRs.

Note that writes to MPERF (and APERF) continue to be unsupported.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: New.
---
I did consider whether to put the code in guest_rdmsr() instead, but
decided that it's better to have it next to TSC handling.

--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3456,6 +3456,22 @@ int hvm_msr_read_intercept(unsigned int
         *msr_content = v->arch.hvm.msr_tsc_adjust;
         break;
 
+    case MSR_MPERF_RD_ONLY:
+        if ( !d->arch.cpuid->extd.efro )
+        {
+            goto gp_fault;
+
+    case MSR_IA32_MPERF:
+            if ( !(d->arch.cpuid->basic.raw[6].c &
+                   CPUID6_ECX_APERFMPERF_CAPABILITY) )
+                goto gp_fault;
+        }
+        if ( rdmsr_safe(msr, *msr_content) )
+            goto gp_fault;
+        if ( d->arch.cpuid->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
+            *msr_content = hvm_get_guest_tsc_fixed(v, *msr_content);
+        break;
+
     case MSR_APIC_BASE:
         *msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
         break;
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -397,6 +397,9 @@
 #define MSR_IA32_MPERF			0x000000e7
 #define MSR_IA32_APERF			0x000000e8
 
+#define MSR_MPERF_RD_ONLY		0xc00000e7
+#define MSR_APERF_RD_ONLY		0xc00000e8
+
 #define MSR_IA32_THERM_CONTROL		0x0000019a
 #define MSR_IA32_THERM_INTERRUPT	0x0000019b
 #define MSR_IA32_THERM_STATUS		0x0000019c



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v5 08/10] x86emul: support RDPRU
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
                   ` (7 preceding siblings ...)
  2020-03-24 12:35 ` [Xen-devel] [PATCH v5 07/10] x86/HVM: scale MPERF values reported to guests (on AMD) Jan Beulich
@ 2020-03-24 12:36 ` Jan Beulich
  2020-03-24 12:37 ` [Xen-devel] [PATCH v5 09/10] x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads Jan Beulich
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:36 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Paul Durrant, Wei Liu, Roger Pau Monne

While the PM doesn't say so, this assumes that the MPERF value read this
way gets scaled similarly to its reading through RDMSR.

Also introduce the SVM related constants at this occasion.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: The CPUID field used is just 8 bits wide.
v4: Add GENERAL2_INTERCEPT_RDPRU and VMEXIT_RDPRU enumerators. Fold
    handling of out of bounds indexes into switch(). Avoid
    recalculate_misc() clobbering what recalculate_cpu_policy() has
    done. Re-base.
v3: New.
---
RFC: Andrew promised to take care of the CPUID side of this; re-base
     over his work once available.

--- a/tools/libxl/libxl_cpuid.c
+++ b/tools/libxl/libxl_cpuid.c
@@ -260,6 +260,7 @@ int libxl_cpuid_parse_config(libxl_cpuid
 
         {"clzero",       0x80000008, NA, CPUID_REG_EBX,  0,  1},
         {"rstr-fp-err-ptrs", 0x80000008, NA, CPUID_REG_EBX, 2, 1},
+        {"rdpru",        0x80000008, NA, CPUID_REG_EBX,  4,  1},
         {"wbnoinvd",     0x80000008, NA, CPUID_REG_EBX,  9,  1},
         {"ibpb",         0x80000008, NA, CPUID_REG_EBX, 12,  1},
         {"ppin",         0x80000008, NA, CPUID_REG_EBX, 23,  1},
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -147,6 +147,8 @@ static const char *const str_e8b[32] =
     [ 0] = "clzero",
     [ 2] = "rstr-fp-err-ptrs",
 
+    [ 4] = "rdpru",
+
     /* [ 8] */            [ 9] = "wbnoinvd",
 
     [12] = "ibpb",
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -683,6 +683,13 @@ static int read_msr(
 {
     switch ( reg )
     {
+    case 0x000000e8: /* APERF */
+    case 0xc00000e8: /* APERF_RD_ONLY */
+#define APERF_LO_VALUE 0xAEAEAEAE
+#define APERF_HI_VALUE 0xEAEAEAEA
+        *val = ((uint64_t)APERF_HI_VALUE << 32) | APERF_LO_VALUE;
+        return X86EMUL_OKAY;
+
     case 0xc0000080: /* EFER */
         *val = ctxt->addr_size > 32 ? 0x500 /* LME|LMA */ : 0;
         return X86EMUL_OKAY;
@@ -2249,6 +2256,30 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing rdpru...");
+    instr[0] = 0x0f; instr[1] = 0x01; instr[2] = 0xfd;
+    regs.eip = (unsigned long)&instr[0];
+    regs.ecx = 1;
+    regs.eflags = EFLAGS_ALWAYS_SET;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (regs.eax != APERF_LO_VALUE) || (regs.edx != APERF_HI_VALUE) ||
+         !(regs.eflags & X86_EFLAGS_CF) ||
+         (regs.eip != (unsigned long)&instr[3]) )
+        goto fail;
+    if ( ctxt.cpuid->extd.rdpru_max < 0xffff )
+    {
+        regs.eip = (unsigned long)&instr[0];
+        regs.ecx = ctxt.cpuid->extd.rdpru_max + 1;
+        regs.eflags = EFLAGS_ALWAYS_SET | X86_EFLAGS_CF;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.eax || regs.edx ||
+             (regs.eflags & X86_EFLAGS_CF) ||
+             (regs.eip != (unsigned long)&instr[3]) )
+            goto fail;
+    }
+    printf("okay\n");
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -78,6 +78,8 @@ bool emul_test_init(void)
     cp.feat.rdpid = true;
     cp.feat.movdiri = true;
     cp.extd.clzero = true;
+    cp.extd.rdpru = true;
+    cp.extd.rdpru_max = 1;
 
     if ( cpu_has_xsave )
     {
@@ -150,11 +152,11 @@ int emul_test_cpuid(
     }
 
     /*
-     * The emulator doesn't itself use CLZERO, so we can always run the
+     * The emulator doesn't itself use CLZERO/RDPRU, so we can always run the
      * respective test(s).
      */
     if ( leaf == 0x80000008 )
-        res->b |= 1U << 0;
+        res->b |= (1U << 0) | (1U << 4);
 
     return X86EMUL_OKAY;
 }
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -243,8 +243,6 @@ static void recalculate_misc(struct cpui
     /* Most of Power/RAS hidden from guests. */
     p->extd.raw[0x7].a = p->extd.raw[0x7].b = p->extd.raw[0x7].c = 0;
 
-    p->extd.raw[0x8].d = 0;
-
     switch ( p->x86_vendor )
     {
     case X86_VENDOR_INTEL:
@@ -263,6 +261,7 @@ static void recalculate_misc(struct cpui
 
         p->extd.raw[0x8].a &= 0x0000ffff;
         p->extd.raw[0x8].c = 0;
+        p->extd.raw[0x8].d = 0;
         break;
 
     case X86_VENDOR_AMD:
@@ -281,6 +280,7 @@ static void recalculate_misc(struct cpui
 
         p->extd.raw[0x8].a &= 0x0000ffff; /* GuestMaxPhysAddr hidden. */
         p->extd.raw[0x8].c &= 0x0003f0ff;
+        p->extd.raw[0x8].d &= 0xffff0000;
 
         p->extd.raw[0x9] = EMPTY_LEAF;
 
@@ -643,6 +643,11 @@ void recalculate_cpuid_policy(struct dom
 
     p->extd.maxlinaddr = p->extd.lm ? 48 : 32;
 
+    if ( p->extd.rdpru )
+        p->extd.rdpru_max = min(p->extd.rdpru_max, max->extd.rdpru_max);
+    else
+        p->extd.rdpru_max = 0;
+
     recalculate_xstate(p);
     recalculate_misc(p);
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1877,6 +1877,7 @@ in_protmode(
 #define vcpu_has_fma4()        (ctxt->cpuid->extd.fma4)
 #define vcpu_has_tbm()         (ctxt->cpuid->extd.tbm)
 #define vcpu_has_clzero()      (ctxt->cpuid->extd.clzero)
+#define vcpu_has_rdpru()       (ctxt->cpuid->extd.rdpru)
 #define vcpu_has_wbnoinvd()    (ctxt->cpuid->extd.wbnoinvd)
 
 #define vcpu_has_bmi1()        (ctxt->cpuid->feat.bmi1)
@@ -5711,6 +5712,50 @@ x86_emulate(
                 limit -= sizeof(zero);
             }
             break;
+
+        case 0xfd: /* rdpru */
+            vcpu_must_have(rdpru);
+
+            if ( !mode_ring0() )
+            {
+                fail_if(!ops->read_cr);
+                if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY )
+                    goto done;
+                generate_exception_if(cr4 & X86_CR4_TSD, EXC_UD);
+            }
+
+            switch ( _regs.ecx | -(_regs.ecx > ctxt->cpuid->extd.rdpru_max) )
+            {
+            case 0:  n = MSR_IA32_MPERF; break;
+            case 1:  n = MSR_IA32_APERF; break;
+            default: n = 0; break;
+            }
+
+            _regs.eflags &= ~EFLAGS_MASK;
+            if ( n )
+            {
+                fail_if(!ops->read_msr);
+                switch ( rc = ops->read_msr(n, &msr_val, ctxt) )
+                {
+                case X86EMUL_OKAY:
+                    _regs.eflags |= X86_EFLAGS_CF;
+                    break;
+
+                case X86EMUL_EXCEPTION:
+                    x86_emul_reset_event(ctxt);
+                    rc = X86EMUL_OKAY;
+                    break;
+
+                default:
+                    goto done;
+                }
+            }
+
+            if ( !(_regs.eflags & X86_EFLAGS_CF) )
+                msr_val = 0;
+            _regs.r(dx) = msr_val >> 32;
+            _regs.r(ax) = (uint32_t)msr_val;
+            break;
         }
 
 #define _GRP7(mod, reg) \
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -74,7 +74,8 @@ enum GenericIntercept2bits
     GENERAL2_INTERCEPT_MONITOR = 1 << 10,
     GENERAL2_INTERCEPT_MWAIT   = 1 << 11,
     GENERAL2_INTERCEPT_MWAIT_CONDITIONAL = 1 << 12,
-    GENERAL2_INTERCEPT_XSETBV  = 1 << 13
+    GENERAL2_INTERCEPT_XSETBV  = 1 << 13,
+    GENERAL2_INTERCEPT_RDPRU   = 1 << 14,
 };
 
 
@@ -298,6 +299,7 @@ enum VMEXIT_EXITCODE
     VMEXIT_MWAIT            = 139, /* 0x8b */
     VMEXIT_MWAIT_CONDITIONAL= 140, /* 0x8c */
     VMEXIT_XSETBV           = 141, /* 0x8d */
+    VMEXIT_RDPRU            = 142, /* 0x8e */
     VMEXIT_NPF              = 1024, /* 0x400, nested paging fault */
     VMEXIT_INVALID          =  -1
 };
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -248,6 +248,7 @@ XEN_CPUFEATURE(EFRO,          7*32+10) /
 /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */
 XEN_CPUFEATURE(CLZERO,        8*32+ 0) /*A  CLZERO instruction */
 XEN_CPUFEATURE(RSTR_FP_ERR_PTRS, 8*32+ 2) /*A  (F)X{SAVE,RSTOR} always saves/restores FPU Error pointers */
+XEN_CPUFEATURE(RDPRU,         8*32+ 4) /*A  RDPRU instruction */
 XEN_CPUFEATURE(WBNOINVD,      8*32+ 9) /*   WBNOINVD instruction */
 XEN_CPUFEATURE(IBPB,          8*32+12) /*A  IBPB support only (no IBRS, used by AMD) */
 XEN_CPUFEATURE(AMD_PPIN,      8*32+23) /*   Protected Processor Inventory Number */
--- a/xen/include/xen/lib/x86/cpuid.h
+++ b/xen/include/xen/lib/x86/cpuid.h
@@ -264,7 +264,7 @@ struct cpuid_policy
                 struct { DECL_BITFIELD(e8b); };
             };
             uint32_t nc:8, :4, apic_id_size:4, :16;
-            uint32_t /* d */:32;
+            uint8_t :8, :8, rdpru_max, :8;
         };
     } extd;
 



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v5 09/10] x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
                   ` (8 preceding siblings ...)
  2020-03-24 12:36 ` [Xen-devel] [PATCH v5 08/10] x86emul: support RDPRU Jan Beulich
@ 2020-03-24 12:37 ` Jan Beulich
  2020-03-27  2:24   ` Tian, Kevin
  2020-03-24 12:37 ` [Xen-devel] [PATCH v5 10/10] x86emul: support MCOMMIT Jan Beulich
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:37 UTC (permalink / raw)
  To: xen-devel
  Cc: Kevin Tian, Wei Liu, Andrew Cooper, Paul Durrant, Jun Nakajima,
	Roger Pau Monne

If the hardware can handle accesses, we should allow it to do so. This
way we can expose EFRO to HVM guests, and "all" that's left for exposing
APERF/MPERF is to figure out how to handle writes to these MSRs. (Note
that the leaf 6 guest CPUID checks will evaluate to false for now, as
recalculate_misc() zaps the entire leaf for now.)

For TSC the intercepts are made mirror the RDTSC ones.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Make TSC intercepts mirror RDTSC ones. Re-base.
v3: New.

--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -595,6 +595,7 @@ static void svm_cpuid_policy_changed(str
     struct vmcb_struct *vmcb = svm->vmcb;
     const struct cpuid_policy *cp = v->domain->arch.cpuid;
     u32 bitmap = vmcb_get_exception_intercepts(vmcb);
+    unsigned int mode;
 
     if ( opt_hvm_fep ||
          (v->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor) )
@@ -607,6 +608,17 @@ static void svm_cpuid_policy_changed(str
     /* Give access to MSR_PRED_CMD if the guest has been told about it. */
     svm_intercept_msr(v, MSR_PRED_CMD,
                       cp->extd.ibpb ? MSR_INTERCEPT_NONE : MSR_INTERCEPT_RW);
+
+    /* Allow direct reads from APERF/MPERF if permitted by the policy. */
+    mode = cp->basic.raw[6].c & CPUID6_ECX_APERFMPERF_CAPABILITY
+           ? MSR_INTERCEPT_WRITE : MSR_INTERCEPT_RW;
+    svm_intercept_msr(v, MSR_IA32_APERF, mode);
+    svm_intercept_msr(v, MSR_IA32_MPERF, mode);
+
+    /* Allow direct access to their r/o counterparts if permitted. */
+    mode = cp->extd.efro ? MSR_INTERCEPT_NONE : MSR_INTERCEPT_RW;
+    svm_intercept_msr(v, MSR_APERF_RD_ONLY, mode);
+    svm_intercept_msr(v, MSR_MPERF_RD_ONLY, mode);
 }
 
 void svm_sync_vmcb(struct vcpu *v, enum vmcb_sync_state new_state)
@@ -860,7 +872,10 @@ static void svm_set_rdtsc_exiting(struct
     {
         general1_intercepts |= GENERAL1_INTERCEPT_RDTSC;
         general2_intercepts |= GENERAL2_INTERCEPT_RDTSCP;
+        svm_enable_intercept_for_msr(v, MSR_IA32_TSC);
     }
+    else
+        svm_intercept_msr(v, MSR_IA32_TSC, MSR_INTERCEPT_WRITE);
 
     vmcb_set_general1_intercepts(vmcb, general1_intercepts);
     vmcb_set_general2_intercepts(vmcb, general2_intercepts);
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -108,6 +108,7 @@ static int construct_vmcb(struct vcpu *v
     {
         vmcb->_general1_intercepts |= GENERAL1_INTERCEPT_RDTSC;
         vmcb->_general2_intercepts |= GENERAL2_INTERCEPT_RDTSCP;
+        svm_intercept_msr(v, MSR_IA32_TSC, MSR_INTERCEPT_WRITE);
     }
 
     /* Guest segment limits. */
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1141,8 +1141,13 @@ static int construct_vmcs(struct vcpu *v
         vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_CS, VMX_MSR_RW);
         vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_ESP, VMX_MSR_RW);
         vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_EIP, VMX_MSR_RW);
+
+        if ( !(v->arch.hvm.vmx.exec_control & CPU_BASED_RDTSC_EXITING) )
+            vmx_clear_msr_intercept(v, MSR_IA32_TSC, VMX_MSR_R);
+
         if ( paging_mode_hap(d) && (!is_iommu_enabled(d) || iommu_snoop) )
             vmx_clear_msr_intercept(v, MSR_IA32_CR_PAT, VMX_MSR_RW);
+
         if ( (vmexit_ctl & VM_EXIT_CLEAR_BNDCFGS) &&
              (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) )
             vmx_clear_msr_intercept(v, MSR_IA32_BNDCFGS, VMX_MSR_RW);
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -585,6 +585,18 @@ static void vmx_cpuid_policy_changed(str
         vmx_clear_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW);
     else
         vmx_set_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW);
+
+    /* Allow direct reads from APERF/MPERF if permitted by the policy. */
+    if ( cp->basic.raw[6].c & CPUID6_ECX_APERFMPERF_CAPABILITY )
+    {
+        vmx_clear_msr_intercept(v, MSR_IA32_APERF, VMX_MSR_R);
+        vmx_clear_msr_intercept(v, MSR_IA32_MPERF, VMX_MSR_R);
+    }
+    else
+    {
+        vmx_set_msr_intercept(v, MSR_IA32_APERF, VMX_MSR_R);
+        vmx_set_msr_intercept(v, MSR_IA32_MPERF, VMX_MSR_R);
+    }
 }
 
 int vmx_guest_x86_mode(struct vcpu *v)
@@ -1250,7 +1262,12 @@ static void vmx_set_rdtsc_exiting(struct
     vmx_vmcs_enter(v);
     v->arch.hvm.vmx.exec_control &= ~CPU_BASED_RDTSC_EXITING;
     if ( enable )
+    {
         v->arch.hvm.vmx.exec_control |= CPU_BASED_RDTSC_EXITING;
+        vmx_set_msr_intercept(v, MSR_IA32_TSC, VMX_MSR_R);
+    }
+    else
+        vmx_clear_msr_intercept(v, MSR_IA32_TSC, VMX_MSR_R);
     vmx_update_cpu_exec_control(v);
     vmx_vmcs_exit(v);
 }
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -243,7 +243,7 @@ XEN_CPUFEATURE(ENQCMD,        6*32+29) /
 
 /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
 XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */
-XEN_CPUFEATURE(EFRO,          7*32+10) /*   APERF/MPERF Read Only interface */
+XEN_CPUFEATURE(EFRO,          7*32+10) /*S  APERF/MPERF Read Only interface */
 
 /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */
 XEN_CPUFEATURE(CLZERO,        8*32+ 0) /*A  CLZERO instruction */



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v5 10/10] x86emul: support MCOMMIT
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
                   ` (9 preceding siblings ...)
  2020-03-24 12:37 ` [Xen-devel] [PATCH v5 09/10] x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads Jan Beulich
@ 2020-03-24 12:37 ` Jan Beulich
  2020-03-24 12:43 ` [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:37 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Paul Durrant, Wei Liu, Roger Pau Monne

The dependency on a new EFER bit implies that we need to set that bit
ourselves in order to be able to successfully invoke the insn.

Also once again introduce the SVM related constants at this occasion.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
RFC: The exact meaning of the PM stating "any errors encountered by
     those stores have been signaled to associated error logging
     resources" is unclear. Depending on what this entails, blindly
     enabling EFER.MCOMMIT may not be a good idea. Hence the RFC.
---
v5: Re-base.
v4: New.

--- a/tools/libxl/libxl_cpuid.c
+++ b/tools/libxl/libxl_cpuid.c
@@ -261,6 +261,7 @@ int libxl_cpuid_parse_config(libxl_cpuid
         {"clzero",       0x80000008, NA, CPUID_REG_EBX,  0,  1},
         {"rstr-fp-err-ptrs", 0x80000008, NA, CPUID_REG_EBX, 2, 1},
         {"rdpru",        0x80000008, NA, CPUID_REG_EBX,  4,  1},
+        {"mcommit",      0x80000008, NA, CPUID_REG_EBX,  8,  1},
         {"wbnoinvd",     0x80000008, NA, CPUID_REG_EBX,  9,  1},
         {"ibpb",         0x80000008, NA, CPUID_REG_EBX, 12,  1},
         {"ppin",         0x80000008, NA, CPUID_REG_EBX, 23,  1},
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -149,7 +149,7 @@ static const char *const str_e8b[32] =
 
     [ 4] = "rdpru",
 
-    /* [ 8] */            [ 9] = "wbnoinvd",
+    [ 8] = "mcommit",          [ 9] = "wbnoinvd",
 
     [12] = "ibpb",
 
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -798,6 +798,9 @@ static void init_amd(struct cpuinfo_x86
 		wrmsr(MSR_K7_HWCR, l, h);
 	}
 
+	if (cpu_has(c, X86_FEATURE_MCOMMIT))
+		write_efer(read_efer() | EFER_MCOMMIT);
+
 	/* Prevent TSC drift in non single-processor, single-core platforms. */
 	if ((smp_processor_id() == 1) && !cpu_has(c, X86_FEATURE_ITSC))
 		disable_c1_ramping();
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1877,6 +1877,7 @@ in_protmode(
 #define vcpu_has_fma4()        (ctxt->cpuid->extd.fma4)
 #define vcpu_has_tbm()         (ctxt->cpuid->extd.tbm)
 #define vcpu_has_clzero()      (ctxt->cpuid->extd.clzero)
+#define vcpu_has_mcommit()     (ctxt->cpuid->extd.mcommit)
 #define vcpu_has_rdpru()       (ctxt->cpuid->extd.rdpru)
 #define vcpu_has_wbnoinvd()    (ctxt->cpuid->extd.wbnoinvd)
 
@@ -5676,6 +5677,28 @@ x86_emulate(
             _regs.r(cx) = (uint32_t)msr_val;
             goto rdtsc;
 
+        case 0xfa: /* monitorx / mcommit */
+            if ( vex.pfx == vex_f3 )
+            {
+                bool cf;
+
+                host_and_vcpu_must_have(mcommit);
+                if ( !ops->read_msr ||
+                     ops->read_msr(MSR_EFER, &msr_val, ctxt) != X86EMUL_OKAY )
+                    msr_val = 0;
+                generate_exception_if(!(msr_val & EFER_MCOMMIT), EXC_UD);
+                memcpy(get_stub(stub),
+                       ((uint8_t[]){ 0xf3, 0x0f, 0x01, 0xfa, 0xc3 }), 5);
+                _regs.eflags &= ~EFLAGS_MASK;
+                invoke_stub("", ASM_FLAG_OUT(, "setc %[cf]"),
+                            [cf] ASM_FLAG_OUT("=@ccc", "=qm") (cf) : "i" (0));
+                if ( cf )
+                    _regs.eflags |= X86_EFLAGS_CF;
+                put_stub(stub);
+                goto done;
+            }
+            goto unrecognized_insn;
+
         case 0xfc: /* clzero */
         {
             unsigned long zero = 0;
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -131,6 +131,9 @@
 #define cpu_has_avx512_4fmaps   boot_cpu_has(X86_FEATURE_AVX512_4FMAPS)
 #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
 
+/* CPUID level 0x80000008.ebx */
+#define cpu_has_mcommit         boot_cpu_has(X86_FEATURE_MCOMMIT)
+
 /* CPUID level 0x00000007:1.eax */
 #define cpu_has_avx512_bf16     boot_cpu_has(X86_FEATURE_AVX512_BF16)
 
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -78,6 +78,11 @@ enum GenericIntercept2bits
     GENERAL2_INTERCEPT_RDPRU   = 1 << 14,
 };
 
+/* general 3 intercepts */
+enum GenericIntercept3bits
+{
+    GENERAL3_INTERCEPT_MCOMMIT = 1 << 3,
+};
 
 /* control register intercepts */
 enum CRInterceptBits
@@ -300,6 +305,7 @@ enum VMEXIT_EXITCODE
     VMEXIT_MWAIT_CONDITIONAL= 140, /* 0x8c */
     VMEXIT_XSETBV           = 141, /* 0x8d */
     VMEXIT_RDPRU            = 142, /* 0x8e */
+    VMEXIT_MCOMMIT          = 163, /* 0xa3 */
     VMEXIT_NPF              = 1024, /* 0x400, nested paging fault */
     VMEXIT_INVALID          =  -1
 };
@@ -406,7 +412,8 @@ struct vmcb_struct {
     u32 _exception_intercepts;  /* offset 0x08 - cleanbit 0 */
     u32 _general1_intercepts;   /* offset 0x0C - cleanbit 0 */
     u32 _general2_intercepts;   /* offset 0x10 - cleanbit 0 */
-    u32 res01[10];
+    u32 _general3_intercepts;   /* offset 0x14 - cleanbit 0 */
+    u32 res01[9];
     u16 _pause_filter_thresh;   /* offset 0x3C - cleanbit 0 */
     u16 _pause_filter_count;    /* offset 0x3E - cleanbit 0 */
     u64 _iopm_base_pa;          /* offset 0x40 - cleanbit 1 */
@@ -590,6 +597,7 @@ VMCB_ACCESSORS(dr_intercepts, intercepts
 VMCB_ACCESSORS(exception_intercepts, intercepts)
 VMCB_ACCESSORS(general1_intercepts, intercepts)
 VMCB_ACCESSORS(general2_intercepts, intercepts)
+VMCB_ACCESSORS(general3_intercepts, intercepts)
 VMCB_ACCESSORS(pause_filter_count, intercepts)
 VMCB_ACCESSORS(pause_filter_thresh, intercepts)
 VMCB_ACCESSORS(tsc_offset, intercepts)
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -88,6 +88,7 @@
 #define _EFER_NX		11 /* No execute enable */
 #define _EFER_SVME		12 /* AMD: SVM enable */
 #define _EFER_FFXSE		14 /* AMD: Fast FXSAVE/FXRSTOR enable */
+#define _EFER_MCOMMIT		17 /* AMD: MCOMMIT insn enable */
 
 #define EFER_SCE		(1<<_EFER_SCE)
 #define EFER_LME		(1<<_EFER_LME)
@@ -95,9 +96,10 @@
 #define EFER_NX			(1<<_EFER_NX)
 #define EFER_SVME		(1<<_EFER_SVME)
 #define EFER_FFXSE		(1<<_EFER_FFXSE)
+#define EFER_MCOMMIT		(1<<_EFER_MCOMMIT)
 
 #define EFER_KNOWN_MASK		(EFER_SCE | EFER_LME | EFER_LMA | EFER_NX | \
-				 EFER_SVME | EFER_FFXSE)
+				 EFER_SVME | EFER_FFXSE | EFER_MCOMMIT)
 
 /* Intel MSRs. Some also available on other CPUs */
 #define MSR_IA32_PERFCTR0		0x000000c1
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -249,6 +249,7 @@ XEN_CPUFEATURE(EFRO,          7*32+10) /
 XEN_CPUFEATURE(CLZERO,        8*32+ 0) /*A  CLZERO instruction */
 XEN_CPUFEATURE(RSTR_FP_ERR_PTRS, 8*32+ 2) /*A  (F)X{SAVE,RSTOR} always saves/restores FPU Error pointers */
 XEN_CPUFEATURE(RDPRU,         8*32+ 4) /*A  RDPRU instruction */
+XEN_CPUFEATURE(MCOMMIT,       8*32+ 8) /*A  MCOMMIT instruction */
 XEN_CPUFEATURE(WBNOINVD,      8*32+ 9) /*   WBNOINVD instruction */
 XEN_CPUFEATURE(IBPB,          8*32+12) /*A  IBPB support only (no IBRS, used by AMD) */
 XEN_CPUFEATURE(AMD_PPIN,      8*32+23) /*   Protected Processor Inventory Number */



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 00/10] x86emul: further work
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
                   ` (10 preceding siblings ...)
  2020-03-24 12:37 ` [Xen-devel] [PATCH v5 10/10] x86emul: support MCOMMIT Jan Beulich
@ 2020-03-24 12:43 ` Jan Beulich
  2020-03-25 10:59   ` Paul Durrant
  2020-03-25 11:41 ` Andrew Cooper
  2020-03-26 11:49 ` Jan Beulich
  13 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-03-24 12:43 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Roger Pau Monne, Wei Liu, Paul Durrant

Paul,On 24.03.2020 13:26, Jan Beulich wrote:
> Some of the later patches are still at least partly RFC, for
> varying reasons (see there). I'd appreciate though if at least
> some of the earlier ones could go in rather sooner than later.
> 
> Patch 1 functionally (for the test harness) depends on
> "libx86/CPUID: fix (not just) leaf 7 processing", while at
> least patch 2 contextually depends on "x86emul: disable
> FPU/MMX/SIMD insn emulation when !HVM".
> 
>  1: x86emul: support AVX512_BF16 insns

I should note that I also have a VP2INTERSECT patch ready, but the
just released SDE segfaults when trying to test it. I'll be holding
this back for some more time, I guess.

>  2: x86emul: support MOVDIRI insn
>  3: x86: determine HAVE_AS_* just once
>  4: x86: move back clang no integrated assembler tests
>  5: x86emul: support MOVDIR64B insn
>  6: x86emul: support ENQCMD insn
>  7: x86/HVM: scale MPERF values reported to guests (on AMD)
>  8: x86emul: support RDPRU
>  9: x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads
> 10: x86emul: support MCOMMIT

Paul, I should also note that I mistakenly Cc-ed your old Citrix
address. I'd like to avoid re-posting the series - do you perhaps
nevertheless get the xen-devel copies?

Jan


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 00/10] x86emul: further work
  2020-03-24 12:43 ` [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
@ 2020-03-25 10:59   ` Paul Durrant
  0 siblings, 0 replies; 29+ messages in thread
From: Paul Durrant @ 2020-03-25 10:59 UTC (permalink / raw)
  To: 'Jan Beulich', xen-devel
  Cc: 'Andrew Cooper', 'Wei Liu', 'Roger Pau Monne'

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: 24 March 2020 12:43
> To: xen-devel@lists.xenproject.org
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>; Paul Durrant <paul@xen.org>; Wei Liu <wl@xen.org>;
> Roger Pau Monne <roger.pau@citrix.com>
> Subject: Re: [PATCH v5 00/10] x86emul: further work
> 
> Paul,On 24.03.2020 13:26, Jan Beulich wrote:
> > Some of the later patches are still at least partly RFC, for
> > varying reasons (see there). I'd appreciate though if at least
> > some of the earlier ones could go in rather sooner than later.
> >
> > Patch 1 functionally (for the test harness) depends on
> > "libx86/CPUID: fix (not just) leaf 7 processing", while at
> > least patch 2 contextually depends on "x86emul: disable
> > FPU/MMX/SIMD insn emulation when !HVM".
> >
> >  1: x86emul: support AVX512_BF16 insns
> 
> I should note that I also have a VP2INTERSECT patch ready, but the
> just released SDE segfaults when trying to test it. I'll be holding
> this back for some more time, I guess.
> 
> >  2: x86emul: support MOVDIRI insn
> >  3: x86: determine HAVE_AS_* just once
> >  4: x86: move back clang no integrated assembler tests
> >  5: x86emul: support MOVDIR64B insn
> >  6: x86emul: support ENQCMD insn
> >  7: x86/HVM: scale MPERF values reported to guests (on AMD)
> >  8: x86emul: support RDPRU
> >  9: x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads
> > 10: x86emul: support MCOMMIT
> 
> Paul, I should also note that I mistakenly Cc-ed your old Citrix
> address. I'd like to avoid re-posting the series - do you perhaps
> nevertheless get the xen-devel copies?
> 

Yeah I have them. My filters just moved them into my general 'xen' mailbox but I got them.

  Paul

> Jan



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 05/10] x86emul: support MOVDIR64B insn
  2020-03-24 12:34 ` [Xen-devel] [PATCH v5 05/10] x86emul: support MOVDIR64B insn Jan Beulich
@ 2020-03-25 11:19   ` Paul Durrant
  2020-03-25 11:46     ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Paul Durrant @ 2020-03-25 11:19 UTC (permalink / raw)
  To: 'Jan Beulich', xen-devel
  Cc: 'Andrew Cooper', 'Paul Durrant',
	'Wei Liu', 'Roger Pau Monne'

> -----Original Message-----
> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of Jan Beulich
> Sent: 24 March 2020 12:34
> To: xen-devel@lists.xenproject.org
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>; Paul Durrant <Paul.Durrant@citrix.com>; Wei Liu
> <wl@xen.org>; Roger Pau Monne <roger.pau@citrix.com>
> Subject: [Xen-devel] [PATCH v5 05/10] x86emul: support MOVDIR64B insn
> 
> Introduce a new blk() hook, paralleling the rmw() on in certain way, but
> being intended for larger data sizes, and hence its HVM intermediate
> handling function doesn't fall back to splitting the operation if the
> requested virtual address can't be mapped.
> 
> Note that SDM revision 071 doesn't specify exception behavior for
> ModRM.mod == 0b11; assuming #UD here.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> TBD: If we want to avoid depending on correct MTRR settings,
>      hvmemul_map_linear_addr() may need to gain a parameter to allow
>      controlling cachability of the produced mapping(s).

Or could we deal with this by adding an optional cache flush into the unmap?

> Of course the
>      function will also need to be made capable of mapping at least
>      p2m_mmio_direct pages for this and the two ENQCMD insns to be
>      actually useful.


I/O emulation parts LGTM so...

Reviewed-by: Paul Durrant <paul@xen.org>



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 00/10] x86emul: further work
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
                   ` (11 preceding siblings ...)
  2020-03-24 12:43 ` [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
@ 2020-03-25 11:41 ` Andrew Cooper
  2020-03-25 11:45   ` Jan Beulich
  2020-03-26 11:49 ` Jan Beulich
  13 siblings, 1 reply; 29+ messages in thread
From: Andrew Cooper @ 2020-03-25 11:41 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant, Wei Liu, Roger Pau Monne

On 24/03/2020 12:26, Jan Beulich wrote:
> Some of the later patches are still at least partly RFC, for
> varying reasons (see there). I'd appreciate though if at least
> some of the earlier ones could go in rather sooner than later.
>
> Patch 1 functionally (for the test harness) depends on
> "libx86/CPUID: fix (not just) leaf 7 processing", while at
> least patch 2 contextually depends on "x86emul: disable
> FPU/MMX/SIMD insn emulation when !HVM".
>
>  1: x86emul: support AVX512_BF16 insns
>  2: x86emul: support MOVDIRI insn
>  3: x86: determine HAVE_AS_* just once

I have (in this order when threaded):

02/10 Support AVX512_BF16
02/10 Support MOVDIRI
01/10 Support AVX512_BF16
03/10 Determine HAS_AS_*

To a first approximation, the two AVX512_BF16 ones look identical, so
I'm going to assume that the first one (chronologically) was the error.

~Andrew


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 00/10] x86emul: further work
  2020-03-25 11:41 ` Andrew Cooper
@ 2020-03-25 11:45   ` Jan Beulich
  0 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-25 11:45 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Paul Durrant, Wei Liu, Roger Pau Monne

On 25.03.2020 12:41, Andrew Cooper wrote:
> On 24/03/2020 12:26, Jan Beulich wrote:
>> Some of the later patches are still at least partly RFC, for
>> varying reasons (see there). I'd appreciate though if at least
>> some of the earlier ones could go in rather sooner than later.
>>
>> Patch 1 functionally (for the test harness) depends on
>> "libx86/CPUID: fix (not just) leaf 7 processing", while at
>> least patch 2 contextually depends on "x86emul: disable
>> FPU/MMX/SIMD insn emulation when !HVM".
>>
>>  1: x86emul: support AVX512_BF16 insns
>>  2: x86emul: support MOVDIRI insn
>>  3: x86: determine HAVE_AS_* just once
> 
> I have (in this order when threaded):
> 
> 02/10 Support AVX512_BF16
> 02/10 Support MOVDIRI
> 01/10 Support AVX512_BF16
> 03/10 Determine HAS_AS_*
> 
> To a first approximation, the two AVX512_BF16 ones look identical, so
> I'm going to assume that the first one (chronologically) was the error.

Yes indeed, sorry.

Jan


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 05/10] x86emul: support MOVDIR64B insn
  2020-03-25 11:19   ` Paul Durrant
@ 2020-03-25 11:46     ` Jan Beulich
  0 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-25 11:46 UTC (permalink / raw)
  To: paul
  Cc: xen-devel, 'Paul Durrant', 'Roger Pau Monne',
	'Wei Liu', 'Andrew Cooper'

On 25.03.2020 12:19, Paul Durrant wrote:
>> -----Original Message-----
>> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of Jan Beulich
>> Sent: 24 March 2020 12:34
>> To: xen-devel@lists.xenproject.org
>> Cc: Andrew Cooper <andrew.cooper3@citrix.com>; Paul Durrant <Paul.Durrant@citrix.com>; Wei Liu
>> <wl@xen.org>; Roger Pau Monne <roger.pau@citrix.com>
>> Subject: [Xen-devel] [PATCH v5 05/10] x86emul: support MOVDIR64B insn
>>
>> Introduce a new blk() hook, paralleling the rmw() on in certain way, but
>> being intended for larger data sizes, and hence its HVM intermediate
>> handling function doesn't fall back to splitting the operation if the
>> requested virtual address can't be mapped.
>>
>> Note that SDM revision 071 doesn't specify exception behavior for
>> ModRM.mod == 0b11; assuming #UD here.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> TBD: If we want to avoid depending on correct MTRR settings,
>>      hvmemul_map_linear_addr() may need to gain a parameter to allow
>>      controlling cachability of the produced mapping(s).
> 
> Or could we deal with this by adding an optional cache flush into the unmap?

But (non-)cachability of a range can't generally by covered by
simply flushing the cache.

>> Of course the
>>      function will also need to be made capable of mapping at least
>>      p2m_mmio_direct pages for this and the two ENQCMD insns to be
>>      actually useful.
> 
> 
> I/O emulation parts LGTM so...
> 
> Reviewed-by: Paul Durrant <paul@xen.org>

Thanks much.

Jan


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 01/10] x86emul: support AVX512_BF16 insns
  2020-03-24 12:30 ` [Xen-devel] [PATCH v5 01/10] x86emul: support AVX512_BF16 insns Jan Beulich
@ 2020-03-25 20:02   ` Andrew Cooper
  2020-03-27 18:20   ` Andrew Cooper
  1 sibling, 0 replies; 29+ messages in thread
From: Andrew Cooper @ 2020-03-25 20:02 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant, Wei Liu, Roger Pau Monne

On 24/03/2020 12:30, Jan Beulich wrote:
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 02/10] x86emul: support MOVDIRI insn
  2020-03-24 12:29 ` [Xen-devel] [PATCH v5 02/10] x86emul: support MOVDIRI insn Jan Beulich
@ 2020-03-25 20:58   ` Andrew Cooper
  2020-03-26  9:20     ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Andrew Cooper @ 2020-03-25 20:58 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant, Wei Liu, Roger Pau Monne

On 24/03/2020 12:29, Jan Beulich wrote:
> Note that SDM revision 070 doesn't specify exception behavior for
> ModRM.mod == 0b11; assuming #UD here.

Didn't I confirm this behaviour for you last time around?

> @@ -10075,6 +10079,14 @@ x86_emulate(
>                              : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
>          break;
>  
> +    case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
> +        vcpu_must_have(movdiri);
> +        generate_exception_if(dst.type != OP_MEM, EXC_UD);
> +        /* Ignore the non-temporal behavior for now. */
> +        dst.val = src.val;
> +        sfence = true;

Looking again at the SDM, I'm not entirely sure this is good enough.

Even on top of WB/WP mappings, it needs to have WC properties, knock
aliasing lines out of the cache, and ending up as a bus transaction.

Also, I'm not convinced the current chunking algorithm for qemu which
repeatedly subdivides down to 1, is compatible with the misaligned
behaviour described, guaranteeing a split of two.

~Andrew


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 03/10] x86: determine HAVE_AS_* just once
  2020-03-24 12:33 ` [Xen-devel] [PATCH v5 03/10] x86: determine HAVE_AS_* just once Jan Beulich
@ 2020-03-25 21:12   ` Andrew Cooper
  2020-03-26  9:50     ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Andrew Cooper @ 2020-03-25 21:12 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Stefano Stabellini, Julien Grall, Wei Liu, Ian Jackson,
	George Dunlap, Anthony PERARD, Roger Pau Monne

On 24/03/2020 12:33, Jan Beulich wrote:
> With the exception of HAVE_AS_QUOTED_SYM, populate the results into a
> generated header instead of (at least once per [sub]directory) into
> CFLAGS. This results in proper rebuilds (via make dependencies) in case
> the compiler used changes between builds. It additionally eases
> inspection of which assembler features were actually found usable.
>
> Some trickery is needed to avoid header generation itself to try to
> include the to-be/not-yet-generated header.
>
> Since the definitions in generated/config.h, previously having been
> command line options, might even affect xen/config.h or its descendants,
> move adding of the -include option for the latter after inclusion of the
> per-arch Rules.mk. Use the occasion to also move the most general -I
> option to the common Rules.mk.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Given the work of Anthony's which is already committed in staging, I'd
really prefer this patch to look something like
https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?h=WIP.x86/asm&id=95ef9f80ed6359e89f988121521c421b7e9528de

That avoids all fragile games with includes, and is the position we want
to be in, longterm.

All the requisite infrastructure looks to be already present.

~Andrew


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 02/10] x86emul: support MOVDIRI insn
  2020-03-25 20:58   ` Andrew Cooper
@ 2020-03-26  9:20     ` Jan Beulich
  0 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-26  9:20 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Paul Durrant, Wei Liu, Roger Pau Monne

On 25.03.2020 21:58, Andrew Cooper wrote:
> On 24/03/2020 12:29, Jan Beulich wrote:
>> Note that SDM revision 070 doesn't specify exception behavior for
>> ModRM.mod == 0b11; assuming #UD here.
> 
> Didn't I confirm this behaviour for you last time around?

Iirc you did, but the SDM still hasn't changed. Do you have a
suggestion on alternative wording.

>> @@ -10075,6 +10079,14 @@ x86_emulate(
>>                              : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
>>          break;
>>  
>> +    case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
>> +        vcpu_must_have(movdiri);
>> +        generate_exception_if(dst.type != OP_MEM, EXC_UD);
>> +        /* Ignore the non-temporal behavior for now. */
>> +        dst.val = src.val;
>> +        sfence = true;
> 
> Looking again at the SDM, I'm not entirely sure this is good enough.
> 
> Even on top of WB/WP mappings, it needs to have WC properties, knock
> aliasing lines out of the cache, and ending up as a bus transaction.
> 
> Also, I'm not convinced the current chunking algorithm for qemu which
> repeatedly subdivides down to 1, is compatible with the misaligned
> behaviour described, guaranteeing a split of two.

Taking care of these two will be a significant amount of (re-)work of
the HVM emulation layer. I'll see if I can come up with time and ideas
on how to do this.

Jan


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 03/10] x86: determine HAVE_AS_* just once
  2020-03-25 21:12   ` Andrew Cooper
@ 2020-03-26  9:50     ` Jan Beulich
  2020-03-26 13:42       ` Anthony PERARD
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-03-26  9:50 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Stefano Stabellini, Julien Grall, Wei Liu, Ian Jackson,
	George Dunlap, Anthony PERARD, xen-devel, Roger Pau Monne

On 25.03.2020 22:12, Andrew Cooper wrote:
> On 24/03/2020 12:33, Jan Beulich wrote:
>> With the exception of HAVE_AS_QUOTED_SYM, populate the results into a
>> generated header instead of (at least once per [sub]directory) into
>> CFLAGS. This results in proper rebuilds (via make dependencies) in case
>> the compiler used changes between builds. It additionally eases
>> inspection of which assembler features were actually found usable.
>>
>> Some trickery is needed to avoid header generation itself to try to
>> include the to-be/not-yet-generated header.
>>
>> Since the definitions in generated/config.h, previously having been
>> command line options, might even affect xen/config.h or its descendants,
>> move adding of the -include option for the latter after inclusion of the
>> per-arch Rules.mk. Use the occasion to also move the most general -I
>> option to the common Rules.mk.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Given the work of Anthony's which is already committed in staging, I'd
> really prefer this patch to look something like
> https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?h=WIP.x86/asm&id=95ef9f80ed6359e89f988121521c421b7e9528de
> 
> That avoids all fragile games with includes, and is the position we want
> to be in, longterm.

Ah, so they already have something going in that direction. Looks
okay to me, albeit ...

> All the requisite infrastructure looks to be already present.

... there's the one open prereq question of what happens upon
tool chain updates. It's not clear to me if/how kconfig would
get invoked despite none of the recorded dependencies having
changed in such a case. (I'm sure you realize there's no issue
with this when the determination occurs out of a makefile.)

Jan


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 00/10] x86emul: further work
  2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
                   ` (12 preceding siblings ...)
  2020-03-25 11:41 ` Andrew Cooper
@ 2020-03-26 11:49 ` Jan Beulich
  13 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-26 11:49 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Paul Durrant, Wei Liu, Roger Pau Monne

On 24.03.2020 13:26, Jan Beulich wrote:
> Some of the later patches are still at least partly RFC, for
> varying reasons (see there). I'd appreciate though if at least
> some of the earlier ones could go in rather sooner than later.
> 
> Patch 1 functionally (for the test harness) depends on
> "libx86/CPUID: fix (not just) leaf 7 processing", while at
> least patch 2 contextually depends on "x86emul: disable
> FPU/MMX/SIMD insn emulation when !HVM".
> 
>  1: x86emul: support AVX512_BF16 insns

Thanks for the ack on patch 1, which I've just pushed, but may I
also ask for an ack or otherwise on the (test harness) prereq
named above, and ideally also the other patch named there?

Thanks, Jan


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 03/10] x86: determine HAVE_AS_* just once
  2020-03-26  9:50     ` Jan Beulich
@ 2020-03-26 13:42       ` Anthony PERARD
  2020-03-26 14:20         ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Anthony PERARD @ 2020-03-26 13:42 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Julien Grall, Wei Liu, Andrew Cooper,
	Ian Jackson, George Dunlap, xen-devel, Roger Pau Monne

On Thu, Mar 26, 2020 at 10:50:48AM +0100, Jan Beulich wrote:
> On 25.03.2020 22:12, Andrew Cooper wrote:
> > All the requisite infrastructure looks to be already present.
> 
> ... there's the one open prereq question of what happens upon
> tool chain updates. It's not clear to me if/how kconfig would
> get invoked despite none of the recorded dependencies having
> changed in such a case. (I'm sure you realize there's no issue
> with this when the determination occurs out of a makefile.)

We might need one small change for this to happen, it is to add a
comment in .config which display the output of `$(CC) --version | head
-1`. Simple :-).
If the output of `$(CC) --version` changes, kconfig will run again. That
would be enough to detect tool chain updates, right?

Have a look at "include/config/auto.conf.cmd" to find out how kconfig is
forced to run again.

I'll prepare a patch.

-- 
Anthony PERARD


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 03/10] x86: determine HAVE_AS_* just once
  2020-03-26 13:42       ` Anthony PERARD
@ 2020-03-26 14:20         ` Jan Beulich
  0 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-26 14:20 UTC (permalink / raw)
  To: Anthony PERARD
  Cc: Stefano Stabellini, Julien Grall, Wei Liu, Andrew Cooper,
	Ian Jackson, George Dunlap, xen-devel, Roger Pau Monne

On 26.03.2020 14:42, Anthony PERARD wrote:
> On Thu, Mar 26, 2020 at 10:50:48AM +0100, Jan Beulich wrote:
>> On 25.03.2020 22:12, Andrew Cooper wrote:
>>> All the requisite infrastructure looks to be already present.
>>
>> ... there's the one open prereq question of what happens upon
>> tool chain updates. It's not clear to me if/how kconfig would
>> get invoked despite none of the recorded dependencies having
>> changed in such a case. (I'm sure you realize there's no issue
>> with this when the determination occurs out of a makefile.)
> 
> We might need one small change for this to happen, it is to add a
> comment in .config which display the output of `$(CC) --version | head
> -1`. Simple :-).
> If the output of `$(CC) --version` changes, kconfig will run again. That
> would be enough to detect tool chain updates, right?

I'm afraid it's not that simple: For one I'm not sure that line
would indeed change when a distro issues a gcc update. Even the
minor version may not change in this case; recall as an example
the backport of the compiler support backing INDIRECT_THUNK.
And then gcc isn't the tool chain - it may well be that e.g. gas
gets updated (supporting new insns or directives) without gcc
getting touched at all. Plus finally I don't think a comment
like you suggest would do - while processing it kconfig would
find that $(CC) gets used, but aiui it would then record just
$(CC) as needing tracking, not the output of the command. But
maybe I'm lacking some further detail here.

> Have a look at "include/config/auto.conf.cmd" to find out how kconfig is
> forced to run again.

Oh, that's good to know. Thanks for the pointer.

Jan


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 09/10] x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads
  2020-03-24 12:37 ` [Xen-devel] [PATCH v5 09/10] x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads Jan Beulich
@ 2020-03-27  2:24   ` Tian, Kevin
  0 siblings, 0 replies; 29+ messages in thread
From: Tian, Kevin @ 2020-03-27  2:24 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Andrew Cooper, Paul Durrant, Nakajima, Jun, Wei Liu, Roger Pau Monne

> From: Jan Beulich <jbeulich@suse.com>
> Sent: Tuesday, March 24, 2020 8:37 PM
> 
> If the hardware can handle accesses, we should allow it to do so. This
> way we can expose EFRO to HVM guests, and "all" that's left for exposing
> APERF/MPERF is to figure out how to handle writes to these MSRs. (Note
> that the leaf 6 guest CPUID checks will evaluate to false for now, as
> recalculate_misc() zaps the entire leaf for now.)
> 
> For TSC the intercepts are made mirror the RDTSC ones.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 01/10] x86emul: support AVX512_BF16 insns
  2020-03-24 12:30 ` [Xen-devel] [PATCH v5 01/10] x86emul: support AVX512_BF16 insns Jan Beulich
  2020-03-25 20:02   ` Andrew Cooper
@ 2020-03-27 18:20   ` Andrew Cooper
  2020-03-30  6:40     ` Jan Beulich
  1 sibling, 1 reply; 29+ messages in thread
From: Andrew Cooper @ 2020-03-27 18:20 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant, Wei Liu, Roger Pau Monne

On 24/03/2020 12:30, Jan Beulich wrote:
> --- a/tools/tests/x86_emulator/evex-disp8.c
> +++ b/tools/tests/x86_emulator/evex-disp8.c
> @@ -550,6 +550,12 @@ static const struct test avx512_4vnniw_5
>      INSN(p4dpwssds, f2, 0f38, 53, el_4, d, vl),
>  };
>  
> +static const struct test avx512_bf16_all[] = {
> +    INSN(vcvtne2ps2bf16, f2, 0f38, 72, vl, d, vl),
> +    INSN(vcvtneps2bf16,  f3, 0f38, 72, vl, d, vl),
> +    INSN(vdpbf16ps,      f3, 0f38, 52, vl, d, vl),
> +};
> +
>  static const struct test avx512_bitalg_all[] = {
>      INSN(popcnt,      66, 0f38, 54, vl, bw, vl),
>      INSN(pshufbitqmb, 66, 0f38, 8f, vl,  b, vl),
> @@ -984,6 +990,7 @@ void evex_disp8_test(void *instr, struct
>      RUN(avx512pf, 512);
>      RUN(avx512_4fmaps, 512);
>      RUN(avx512_4vnniw, 512);
> +    RUN(avx512_bf16, all);
>      RUN(avx512_bitalg, all);
>      RUN(avx512_ifma, all);
>      RUN(avx512_vbmi, all);
> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -4516,6 +4516,80 @@ int main(int argc, char **argv)
>      else
>          printf("skipped\n");
>  
> +    if ( stack_exec && cpu_has_avx512_bf16 )
> +    {
> +        decl_insn(vcvtne2ps2bf16);
> +        decl_insn(vcvtneps2bf16);
> +        decl_insn(vdpbf16ps);
> +        static const struct {
> +            float f[16];
> +        } in1 = {{
> +            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
> +        }}, in2 = {{
> +            1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16
> +        }}, out = {{
> +            1 * 1 + 2 * 2, 3 * 3 + 4 * 4,
> +            5 * 5 + 6 * 6, 7 * 7 + 8 * 8,
> +            9 * 9 + 10 * 10, 11 * 11 + 12 * 12,
> +            13 * 13 + 14 * 14, 15 * 15 + 16 * 16,
> +            1 * 1 - 2 * 2, 3 * 3 - 4 * 4,
> +            5 * 5 - 6 * 6, 7 * 7 - 8 * 8,
> +            9 * 9 - 10 * 10, 11 * 11 - 12 * 12,
> +            13 * 13 - 14 * 14, 15 * 15 - 16 * 16
> +        }};
> +
> +        printf("%-40s", "Testing vcvtne2ps2bf16 64(%ecx),%zmm1,%zmm2...");
> +        asm volatile ( "vmovups %1, %%zmm1\n"
> +                       put_insn(vcvtne2ps2bf16,
> +                                /* vcvtne2ps2bf16 64(%0), %%zmm1, %%zmm2 */
> +                                ".byte 0x62, 0xf2, 0x77, 0x48, 0x72, 0x51, 0x01")
> +                       :: "c" (NULL), "m" (in2) );
> +        set_insn(vcvtne2ps2bf16);
> +        regs.ecx = (unsigned long)&in1 - 64;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( rc != X86EMUL_OKAY || !check_eip(vcvtne2ps2bf16) )
> +            goto fail;
> +        printf("pending\n");
> +
> +        printf("%-40s", "Testing vcvtneps2bf16 64(%ecx),%ymm3...");
> +        asm volatile ( put_insn(vcvtneps2bf16,
> +                                /* vcvtneps2bf16 64(%0), %%ymm3 */
> +                                ".byte 0x62, 0xf2, 0x7e, 0x48, 0x72, 0x59, 0x01")
> +                       :: "c" (NULL) );
> +        set_insn(vcvtneps2bf16);
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( rc != X86EMUL_OKAY || !check_eip(vcvtneps2bf16) )
> +            goto fail;
> +        asm ( "vmovdqa %%ymm2, %%ymm5\n\t"
> +              "vpcmpeqd %%zmm3, %%zmm5, %%k0\n\t"
> +              "kmovw %%k0, %0"
> +              : "=g" (rc) : "m" (out) );
> +        if ( rc != 0xffff )
> +            goto fail;
> +        printf("pending\n");
> +
> +        printf("%-40s", "Testing vdpbf16ps 128(%ecx),%zmm2,%zmm4...");
> +        asm volatile ( "vmovdqa %%ymm3, %0\n\t"
> +                       "vmovdqa %%ymm3, %1\n"
> +                       put_insn(vdpbf16ps,
> +                                /* vdpbf16ps 128(%2), %%zmm2, %%zmm4 */
> +                                ".byte 0x62, 0xf2, 0x6e, 0x48, 0x52, 0x61, 0x02")
> +                       : "=&m" (res[0]), "=&m" (res[8])
> +                       : "c" (NULL)
> +                       : "memory" );
> +        set_insn(vdpbf16ps);
> +        regs.ecx = (unsigned long)res - 128;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( rc != X86EMUL_OKAY || !check_eip(vdpbf16ps) )
> +            goto fail;
> +        asm ( "vcmpeqps %1, %%zmm4, %%k0\n\t"
> +              "kmovw %%k0, %0"
> +              : "=g" (rc) : "m" (out) );
> +        if ( rc != 0xffff )
> +            goto fail;
> +        printf("okay\n");
> +    }

I've just tried this out on an SDP.

Testing vcvtne2ps2bf16 64(%ecx),%zmm1,%zmm2...pending
Testing vcvtneps2bf16 64(%ecx),%ymm3... pending
Testing vdpbf16ps 128(%ecx),%zmm2,%zmm4...okay
...
Testing avx512_bf16/all disp8 handling...okay

What is the "pending" supposed to signify?  I can see that these three
are linked, and that is fine, but at the point we've checked the
intermediate results, it should be "okay", no?

~Andrew


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v5 01/10] x86emul: support AVX512_BF16 insns
  2020-03-27 18:20   ` Andrew Cooper
@ 2020-03-30  6:40     ` Jan Beulich
  0 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-03-30  6:40 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Paul Durrant, Wei Liu, Roger Pau Monne

On 27.03.2020 19:20, Andrew Cooper wrote:
> On 24/03/2020 12:30, Jan Beulich wrote:
>> --- a/tools/tests/x86_emulator/evex-disp8.c
>> +++ b/tools/tests/x86_emulator/evex-disp8.c
>> @@ -550,6 +550,12 @@ static const struct test avx512_4vnniw_5
>>      INSN(p4dpwssds, f2, 0f38, 53, el_4, d, vl),
>>  };
>>  
>> +static const struct test avx512_bf16_all[] = {
>> +    INSN(vcvtne2ps2bf16, f2, 0f38, 72, vl, d, vl),
>> +    INSN(vcvtneps2bf16,  f3, 0f38, 72, vl, d, vl),
>> +    INSN(vdpbf16ps,      f3, 0f38, 52, vl, d, vl),
>> +};
>> +
>>  static const struct test avx512_bitalg_all[] = {
>>      INSN(popcnt,      66, 0f38, 54, vl, bw, vl),
>>      INSN(pshufbitqmb, 66, 0f38, 8f, vl,  b, vl),
>> @@ -984,6 +990,7 @@ void evex_disp8_test(void *instr, struct
>>      RUN(avx512pf, 512);
>>      RUN(avx512_4fmaps, 512);
>>      RUN(avx512_4vnniw, 512);
>> +    RUN(avx512_bf16, all);
>>      RUN(avx512_bitalg, all);
>>      RUN(avx512_ifma, all);
>>      RUN(avx512_vbmi, all);
>> --- a/tools/tests/x86_emulator/test_x86_emulator.c
>> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
>> @@ -4516,6 +4516,80 @@ int main(int argc, char **argv)
>>      else
>>          printf("skipped\n");
>>  
>> +    if ( stack_exec && cpu_has_avx512_bf16 )
>> +    {
>> +        decl_insn(vcvtne2ps2bf16);
>> +        decl_insn(vcvtneps2bf16);
>> +        decl_insn(vdpbf16ps);
>> +        static const struct {
>> +            float f[16];
>> +        } in1 = {{
>> +            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
>> +        }}, in2 = {{
>> +            1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16
>> +        }}, out = {{
>> +            1 * 1 + 2 * 2, 3 * 3 + 4 * 4,
>> +            5 * 5 + 6 * 6, 7 * 7 + 8 * 8,
>> +            9 * 9 + 10 * 10, 11 * 11 + 12 * 12,
>> +            13 * 13 + 14 * 14, 15 * 15 + 16 * 16,
>> +            1 * 1 - 2 * 2, 3 * 3 - 4 * 4,
>> +            5 * 5 - 6 * 6, 7 * 7 - 8 * 8,
>> +            9 * 9 - 10 * 10, 11 * 11 - 12 * 12,
>> +            13 * 13 - 14 * 14, 15 * 15 - 16 * 16
>> +        }};
>> +
>> +        printf("%-40s", "Testing vcvtne2ps2bf16 64(%ecx),%zmm1,%zmm2...");
>> +        asm volatile ( "vmovups %1, %%zmm1\n"
>> +                       put_insn(vcvtne2ps2bf16,
>> +                                /* vcvtne2ps2bf16 64(%0), %%zmm1, %%zmm2 */
>> +                                ".byte 0x62, 0xf2, 0x77, 0x48, 0x72, 0x51, 0x01")
>> +                       :: "c" (NULL), "m" (in2) );
>> +        set_insn(vcvtne2ps2bf16);
>> +        regs.ecx = (unsigned long)&in1 - 64;
>> +        rc = x86_emulate(&ctxt, &emulops);
>> +        if ( rc != X86EMUL_OKAY || !check_eip(vcvtne2ps2bf16) )
>> +            goto fail;
>> +        printf("pending\n");
>> +
>> +        printf("%-40s", "Testing vcvtneps2bf16 64(%ecx),%ymm3...");
>> +        asm volatile ( put_insn(vcvtneps2bf16,
>> +                                /* vcvtneps2bf16 64(%0), %%ymm3 */
>> +                                ".byte 0x62, 0xf2, 0x7e, 0x48, 0x72, 0x59, 0x01")
>> +                       :: "c" (NULL) );
>> +        set_insn(vcvtneps2bf16);
>> +        rc = x86_emulate(&ctxt, &emulops);
>> +        if ( rc != X86EMUL_OKAY || !check_eip(vcvtneps2bf16) )
>> +            goto fail;
>> +        asm ( "vmovdqa %%ymm2, %%ymm5\n\t"
>> +              "vpcmpeqd %%zmm3, %%zmm5, %%k0\n\t"
>> +              "kmovw %%k0, %0"
>> +              : "=g" (rc) : "m" (out) );
>> +        if ( rc != 0xffff )
>> +            goto fail;
>> +        printf("pending\n");
>> +
>> +        printf("%-40s", "Testing vdpbf16ps 128(%ecx),%zmm2,%zmm4...");
>> +        asm volatile ( "vmovdqa %%ymm3, %0\n\t"
>> +                       "vmovdqa %%ymm3, %1\n"
>> +                       put_insn(vdpbf16ps,
>> +                                /* vdpbf16ps 128(%2), %%zmm2, %%zmm4 */
>> +                                ".byte 0x62, 0xf2, 0x6e, 0x48, 0x52, 0x61, 0x02")
>> +                       : "=&m" (res[0]), "=&m" (res[8])
>> +                       : "c" (NULL)
>> +                       : "memory" );
>> +        set_insn(vdpbf16ps);
>> +        regs.ecx = (unsigned long)res - 128;
>> +        rc = x86_emulate(&ctxt, &emulops);
>> +        if ( rc != X86EMUL_OKAY || !check_eip(vdpbf16ps) )
>> +            goto fail;
>> +        asm ( "vcmpeqps %1, %%zmm4, %%k0\n\t"
>> +              "kmovw %%k0, %0"
>> +              : "=g" (rc) : "m" (out) );
>> +        if ( rc != 0xffff )
>> +            goto fail;
>> +        printf("okay\n");
>> +    }
> 
> I've just tried this out on an SDP.
> 
> Testing vcvtne2ps2bf16 64(%ecx),%zmm1,%zmm2...pending
> Testing vcvtneps2bf16 64(%ecx),%ymm3... pending
> Testing vdpbf16ps 128(%ecx),%zmm2,%zmm4...okay
> ...
> Testing avx512_bf16/all disp8 handling...okay
> 
> What is the "pending" supposed to signify?  I can see that these three
> are linked, and that is fine, but at the point we've checked the
> intermediate results, it should be "okay", no?

I didn't think so, and hence I've used "pending". Whether the result of
the first two is indeed correct is only known after the 3rd.

Jan


^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, back to index

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-24 12:26 [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
2020-03-24 12:29 ` [Xen-devel] [PATCH v5 02/10] x86emul: support AVX512_BF16 insns Jan Beulich
2020-03-24 12:29 ` [Xen-devel] [PATCH v5 02/10] x86emul: support MOVDIRI insn Jan Beulich
2020-03-25 20:58   ` Andrew Cooper
2020-03-26  9:20     ` Jan Beulich
2020-03-24 12:30 ` [Xen-devel] [PATCH v5 01/10] x86emul: support AVX512_BF16 insns Jan Beulich
2020-03-25 20:02   ` Andrew Cooper
2020-03-27 18:20   ` Andrew Cooper
2020-03-30  6:40     ` Jan Beulich
2020-03-24 12:33 ` [Xen-devel] [PATCH v5 03/10] x86: determine HAVE_AS_* just once Jan Beulich
2020-03-25 21:12   ` Andrew Cooper
2020-03-26  9:50     ` Jan Beulich
2020-03-26 13:42       ` Anthony PERARD
2020-03-26 14:20         ` Jan Beulich
2020-03-24 12:33 ` [Xen-devel] [PATCH v5 04/10] x86: move back clang no integrated assembler tests Jan Beulich
2020-03-24 12:34 ` [Xen-devel] [PATCH v5 05/10] x86emul: support MOVDIR64B insn Jan Beulich
2020-03-25 11:19   ` Paul Durrant
2020-03-25 11:46     ` Jan Beulich
2020-03-24 12:34 ` [Xen-devel] [PATCH v5 06/10] x86emul: support ENQCMD insn Jan Beulich
2020-03-24 12:35 ` [Xen-devel] [PATCH v5 07/10] x86/HVM: scale MPERF values reported to guests (on AMD) Jan Beulich
2020-03-24 12:36 ` [Xen-devel] [PATCH v5 08/10] x86emul: support RDPRU Jan Beulich
2020-03-24 12:37 ` [Xen-devel] [PATCH v5 09/10] x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads Jan Beulich
2020-03-27  2:24   ` Tian, Kevin
2020-03-24 12:37 ` [Xen-devel] [PATCH v5 10/10] x86emul: support MCOMMIT Jan Beulich
2020-03-24 12:43 ` [Xen-devel] [PATCH v5 00/10] x86emul: further work Jan Beulich
2020-03-25 10:59   ` Paul Durrant
2020-03-25 11:41 ` Andrew Cooper
2020-03-25 11:45   ` Jan Beulich
2020-03-26 11:49 ` Jan Beulich

Xen-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/xen-devel/0 xen-devel/git/0.git
	git clone --mirror https://lore.kernel.org/xen-devel/1 xen-devel/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 xen-devel xen-devel/ https://lore.kernel.org/xen-devel \
		xen-devel@lists.xenproject.org xen-devel@lists.xen.org
	public-inbox-index xen-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.xenproject.lists.xen-devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git