All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/8] x86emul: support various ISA extensions
@ 2017-01-13 15:11 Jan Beulich
  2017-01-13 15:30 ` [PATCH 1/8] x86emul: support POPCNT Jan Beulich
                   ` (7 more replies)
  0 siblings, 8 replies; 29+ messages in thread
From: Jan Beulich @ 2017-01-13 15:11 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

... plus, in the final patch, some cleanup.

1: support POPCNT
2: support ADCX/ADOX
3: support BMI1 insns
4: support BMI2 insns
5: support TBM insns
6: support RDRAND
7: support RDPID
8: rename the no_writeback label

Signed-off-by: Jan Beulich <jbeulich@suse.com>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/8] x86emul: support POPCNT
  2017-01-13 15:11 [PATCH 0/8] x86emul: support various ISA extensions Jan Beulich
@ 2017-01-13 15:30 ` Jan Beulich
  2017-01-13 16:31   ` Andrew Cooper
  2017-01-13 15:31 ` [PATCH 2/8] x86emul: support ADCX/ADOX Jan Beulich
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-13 15:30 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 4732 bytes --]

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: Alternative code needed for binutils < 2.18?

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -684,6 +684,52 @@ int main(int argc, char **argv)
         goto fail;
     printf("okay\n");
 
+    printf("%-40s", "Testing popcnt (%edx),%cx...");
+    if ( cpu_has_popcnt )
+    {
+        instr[0] = 0x66; instr[1] = 0xf3;
+        instr[2] = 0x0f; instr[3] = 0xb8; instr[4] = 0x0a;
+
+        *res        = 0xfedcba98;
+        regs.edx    = (unsigned long)res;
+        regs.eflags = 0xac3;
+        regs.eip    = (unsigned long)&instr[0];
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || (uint16_t)regs.ecx != 8 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xfeb) != 0x202 ||
+             (regs.eip != (unsigned long)&instr[5]) )
+            goto fail;
+        printf("okay\n");
+
+        printf("%-40s", "Testing popcnt (%edx),%ecx...");
+        regs.eflags = 0xac3;
+        regs.eip    = (unsigned long)&instr[1];
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ecx != 20 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xfeb) != 0x202 ||
+             (regs.eip != (unsigned long)&instr[5]) )
+            goto fail;
+        printf("okay\n");
+
+#ifdef __x86_64__
+        printf("%-40s", "Testing popcnt (%rdx),%rcx...");
+        instr[0]    = 0xf3;
+        instr[1]    = 0x48;
+        res[1]      = 0x12345678;
+        regs.eflags = 0xac3;
+        regs.eip    = (unsigned long)&instr[0];
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ecx != 33 ||
+             res[0] != 0xfedcba98 || res[1] != 0x12345678 ||
+             (regs.eflags & 0xfeb) != 0x202 ||
+             (regs.eip != (unsigned long)&instr[5]) )
+            goto fail;
+        printf("okay\n");
+#endif
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing lar (null selector)...");
     instr[0] = 0x0f; instr[1] = 0x02; instr[2] = 0xc1;
     regs.eflags = 0x240;
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -81,6 +81,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.d & (1U << 26)) != 0; \
 })
 
+#define cpu_has_popcnt ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(1, 0, &res, NULL); \
+    (res.c & (1U << 23)) != 0; \
+})
+
 #define cpu_has_xsave ({ \
     struct cpuid_leaf res; \
     emul_test_cpuid(1, 0, &res, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1335,6 +1335,7 @@ static bool vcpu_has(
 #define vcpu_has_cx16()        vcpu_has(         1, ECX, 13, ctxt, ops)
 #define vcpu_has_sse4_2()      vcpu_has(         1, ECX, 20, ctxt, ops)
 #define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
+#define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)
 #define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
 #define vcpu_has_lahf_lm()     vcpu_has(0x80000001, ECX,  0, ctxt, ops)
 #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
@@ -2078,8 +2079,12 @@ x86_decode_twobyte(
         op_bytes = mode_64bit() ? 8 : 4;
         break;
 
+    case 0xb8: /* jmpe / popcnt */
+        if ( rep_prefix() )
+            ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
+        break;
+
         /* Intentionally not handling here despite being modified by F3:
-    case 0xb8: jmpe / popcnt
     case 0xbc: bsf / tzcnt
     case 0xbd: bsr / lzcnt
          * They're being dealt with in the execution phase (if at all).
@@ -5603,6 +5608,14 @@ x86_emulate(
         dst.val = (uint16_t)src.val;
         break;
 
+    case X86EMUL_OPC_F3(0x0f, 0xb8): /* popcnt r/m,r */
+        host_and_vcpu_must_have(popcnt);
+        asm ( "popcnt %1,%0" : "=r" (dst.val) : "rm" (src.val) );
+        _regs._eflags &= ~EFLAGS_MASK;
+        if ( !dst.val )
+            _regs._eflags |= EFLG_ZF;
+        break;
+
     case X86EMUL_OPC(0x0f, 0xba): /* Grp8 */
         switch ( modrm_reg & 7 )
         {
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -40,6 +40,7 @@
 #define cpu_has_mmx		1
 #define cpu_has_sse3		boot_cpu_has(X86_FEATURE_SSE3)
 #define cpu_has_sse4_2		boot_cpu_has(X86_FEATURE_SSE4_2)
+#define cpu_has_popcnt		boot_cpu_has(X86_FEATURE_POPCNT)
 #define cpu_has_htt		boot_cpu_has(X86_FEATURE_HTT)
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
 #define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLUSH)



[-- Attachment #2: x86emul-POPCNT.patch --]
[-- Type: text/plain, Size: 4755 bytes --]

x86emul: support POPCNT

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: Alternative code needed for binutils < 2.18?

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -684,6 +684,52 @@ int main(int argc, char **argv)
         goto fail;
     printf("okay\n");
 
+    printf("%-40s", "Testing popcnt (%edx),%cx...");
+    if ( cpu_has_popcnt )
+    {
+        instr[0] = 0x66; instr[1] = 0xf3;
+        instr[2] = 0x0f; instr[3] = 0xb8; instr[4] = 0x0a;
+
+        *res        = 0xfedcba98;
+        regs.edx    = (unsigned long)res;
+        regs.eflags = 0xac3;
+        regs.eip    = (unsigned long)&instr[0];
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || (uint16_t)regs.ecx != 8 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xfeb) != 0x202 ||
+             (regs.eip != (unsigned long)&instr[5]) )
+            goto fail;
+        printf("okay\n");
+
+        printf("%-40s", "Testing popcnt (%edx),%ecx...");
+        regs.eflags = 0xac3;
+        regs.eip    = (unsigned long)&instr[1];
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ecx != 20 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xfeb) != 0x202 ||
+             (regs.eip != (unsigned long)&instr[5]) )
+            goto fail;
+        printf("okay\n");
+
+#ifdef __x86_64__
+        printf("%-40s", "Testing popcnt (%rdx),%rcx...");
+        instr[0]    = 0xf3;
+        instr[1]    = 0x48;
+        res[1]      = 0x12345678;
+        regs.eflags = 0xac3;
+        regs.eip    = (unsigned long)&instr[0];
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ecx != 33 ||
+             res[0] != 0xfedcba98 || res[1] != 0x12345678 ||
+             (regs.eflags & 0xfeb) != 0x202 ||
+             (regs.eip != (unsigned long)&instr[5]) )
+            goto fail;
+        printf("okay\n");
+#endif
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing lar (null selector)...");
     instr[0] = 0x0f; instr[1] = 0x02; instr[2] = 0xc1;
     regs.eflags = 0x240;
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -81,6 +81,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.d & (1U << 26)) != 0; \
 })
 
+#define cpu_has_popcnt ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(1, 0, &res, NULL); \
+    (res.c & (1U << 23)) != 0; \
+})
+
 #define cpu_has_xsave ({ \
     struct cpuid_leaf res; \
     emul_test_cpuid(1, 0, &res, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1335,6 +1335,7 @@ static bool vcpu_has(
 #define vcpu_has_cx16()        vcpu_has(         1, ECX, 13, ctxt, ops)
 #define vcpu_has_sse4_2()      vcpu_has(         1, ECX, 20, ctxt, ops)
 #define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
+#define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)
 #define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
 #define vcpu_has_lahf_lm()     vcpu_has(0x80000001, ECX,  0, ctxt, ops)
 #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
@@ -2078,8 +2079,12 @@ x86_decode_twobyte(
         op_bytes = mode_64bit() ? 8 : 4;
         break;
 
+    case 0xb8: /* jmpe / popcnt */
+        if ( rep_prefix() )
+            ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
+        break;
+
         /* Intentionally not handling here despite being modified by F3:
-    case 0xb8: jmpe / popcnt
     case 0xbc: bsf / tzcnt
     case 0xbd: bsr / lzcnt
          * They're being dealt with in the execution phase (if at all).
@@ -5603,6 +5608,14 @@ x86_emulate(
         dst.val = (uint16_t)src.val;
         break;
 
+    case X86EMUL_OPC_F3(0x0f, 0xb8): /* popcnt r/m,r */
+        host_and_vcpu_must_have(popcnt);
+        asm ( "popcnt %1,%0" : "=r" (dst.val) : "rm" (src.val) );
+        _regs._eflags &= ~EFLAGS_MASK;
+        if ( !dst.val )
+            _regs._eflags |= EFLG_ZF;
+        break;
+
     case X86EMUL_OPC(0x0f, 0xba): /* Grp8 */
         switch ( modrm_reg & 7 )
         {
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -40,6 +40,7 @@
 #define cpu_has_mmx		1
 #define cpu_has_sse3		boot_cpu_has(X86_FEATURE_SSE3)
 #define cpu_has_sse4_2		boot_cpu_has(X86_FEATURE_SSE4_2)
+#define cpu_has_popcnt		boot_cpu_has(X86_FEATURE_POPCNT)
 #define cpu_has_htt		boot_cpu_has(X86_FEATURE_HTT)
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
 #define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLUSH)

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 2/8] x86emul: support ADCX/ADOX
  2017-01-13 15:11 [PATCH 0/8] x86emul: support various ISA extensions Jan Beulich
  2017-01-13 15:30 ` [PATCH 1/8] x86emul: support POPCNT Jan Beulich
@ 2017-01-13 15:31 ` Jan Beulich
  2017-01-13 16:34   ` Andrew Cooper
  2017-01-13 15:31 ` [PATCH 3/8] x86emul: support BMI1 insns Jan Beulich
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-13 15:31 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 5818 bytes --]

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -885,10 +885,65 @@ int main(int argc, char **argv)
                               #which ": " insn "\n"                     \
                               ".equ " #which "_len, .-" #which "\n"     \
                               ".popsection"
-#define set_insn(which) (regs.eip = (unsigned long)memcpy(instr, which, \
-                                             (unsigned long)which##_len))
-#define check_eip(which) (regs.eip == (unsigned long)instr + \
+#define set_insn(which) (regs.eip = (unsigned long)(which))
+#define valid_eip(which) (regs.eip >= (unsigned long)(which) && \
+                          regs.eip < (unsigned long)(which) + \
                                       (unsigned long)which##_len)
+#define check_eip(which) (regs.eip == (unsigned long)(which) + \
+                                      (unsigned long)which##_len)
+
+    printf("%-40s", "Testing adcx/adox ...");
+    {
+        static const unsigned int data[] = {
+            0x01234567, 0x12345678, 0x23456789, 0x3456789a,
+            0x456789ab, 0x56789abc, 0x6789abcd, 0x789abcde,
+            0x89abcdef, 0x9abcdef0, 0xabcdef01, 0xbcdef012,
+            0xcdef0123, 0xdef01234, 0xef012345, 0xf0123456
+        };
+        decl_insn(adx);
+        unsigned int cf, of;
+
+        asm volatile ( put_insn(adx, ".Lloop%=:\n\t"
+                                     "adcx (%[addr]), %k[dst1]\n\t"
+                                     "adox -%c[full]-%c[elem](%[addr],%[cnt],2*%c[elem]), %k[dst2]\n\t"
+                                     "lea %c[elem](%[addr]),%[addr]\n\t"
+                                     "loop .Lloop%=\n\t"
+                                     "adcx %k[cnt], %k[dst1]\n\t"
+                                     "adox %k[cnt], %k[dst2]\n\t" )
+                       : [addr] "=S" (regs.esi), [cnt] "=c" (regs.ecx),
+                         [dst1] "=a" (regs.eax), [dst2] "=d" (regs.edx)
+                       : [full] "i" (sizeof(data)), [elem] "i" (sizeof(*data)),
+                         "[addr]" (data), "[cnt]" (ARRAY_SIZE(data)),
+                         "[dst1]" (0), "[dst2]" (0) );
+
+        set_insn(adx);
+        regs.eflags = 0x2d6;
+        of = cf = i = 0;
+        while ( (rc = x86_emulate(&ctxt, &emulops)) == X86EMUL_OKAY )
+        {
+            ++i;
+            /*
+             * Count CF/OF being set after each loop iteration during the
+             * first half (to observe different counts), in order to catch
+             * the wrong flag being fiddled with.
+             */
+            if ( i < ARRAY_SIZE(data) * 2 && !(i % 4) )
+            {
+                if ( regs.eflags & 0x001 )
+                   ++cf;
+                if ( regs.eflags & 0x800 )
+                   ++of;
+            }
+            if ( !valid_eip(adx) )
+                break;
+        }
+        if ( (rc != X86EMUL_OKAY) ||
+             i != ARRAY_SIZE(data) * 4 + 2 || cf != 1 || of != 5 ||
+             regs.eax != 0xffffffff || regs.ecx || regs.edx != 0xffffffff ||
+             !check_eip(adx) || regs.eflags != 0x2d6 )
+            goto fail;
+        printf("okay\n");
+    }
 
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -60,6 +60,10 @@ int emul_test_cpuid(
     if ( leaf == 1 )
         res->c |= 1U << 22;
 
+    /* The emulator doesn't itself use ADCX/ADOX, so we can always run the test. */
+    if ( leaf == 7 && subleaf == 0 )
+        res->b |= 1U << 19;
+
     return X86EMUL_OKAY;
 }
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1345,6 +1345,7 @@ static bool vcpu_has(
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
 #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
 #define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
+#define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
 #define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
 #define vcpu_has_clflushopt()  vcpu_has(         7, EBX, 23, ctxt, ops)
 #define vcpu_has_clwb()        vcpu_has(         7, EBX, 24, ctxt, ops)
@@ -5864,6 +5865,40 @@ x86_emulate(
         }
         break;
 #endif
+
+    case X86EMUL_OPC_66(0x0f38, 0xf6): /* adcx r/m,r */
+    case X86EMUL_OPC_F3(0x0f38, 0xf6): /* adox r/m,r */
+    {
+        unsigned int mask = rep_prefix() ? EFLG_OF : EFLG_CF;
+        unsigned int aux = _regs._eflags & mask ? ~0 : 0;
+        bool carry;
+
+        vcpu_must_have(adx);
+#ifdef __x86_64__
+        if ( op_bytes == 8 )
+            asm ( "add %[aux],%[aux]\n\t"
+                  "adc %[src],%[dst]\n\t"
+                  ASM_FLAG_OUT(, "setc %[carry]")
+                  : [dst] "+r" (dst.val),
+                    [carry] ASM_FLAG_OUT("=@ccc", "=qm") (carry),
+                    [aux] "+r" (aux)
+                  : [src] "rm" (src.val) );
+        else
+#endif
+            asm ( "add %[aux],%[aux]\n\t"
+                  "adc %k[src],%k[dst]\n\t"
+                  ASM_FLAG_OUT(, "setc %[carry]")
+                  : [dst] "+r" (dst.val),
+                    [carry] ASM_FLAG_OUT("=@ccc", "=qm") (carry),
+                    [aux] "+r" (aux)
+                  : [src] "rm" (src.val) );
+        if ( carry )
+            _regs._eflags |= mask;
+        else
+            _regs._eflags &= ~mask;
+        break;
+    }
+
     default:
         goto cannot_emulate;
     }



[-- Attachment #2: x86emul-ADX.patch --]
[-- Type: text/plain, Size: 5844 bytes --]

x86emul: support ADCX/ADOX

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -885,10 +885,65 @@ int main(int argc, char **argv)
                               #which ": " insn "\n"                     \
                               ".equ " #which "_len, .-" #which "\n"     \
                               ".popsection"
-#define set_insn(which) (regs.eip = (unsigned long)memcpy(instr, which, \
-                                             (unsigned long)which##_len))
-#define check_eip(which) (regs.eip == (unsigned long)instr + \
+#define set_insn(which) (regs.eip = (unsigned long)(which))
+#define valid_eip(which) (regs.eip >= (unsigned long)(which) && \
+                          regs.eip < (unsigned long)(which) + \
                                       (unsigned long)which##_len)
+#define check_eip(which) (regs.eip == (unsigned long)(which) + \
+                                      (unsigned long)which##_len)
+
+    printf("%-40s", "Testing adcx/adox ...");
+    {
+        static const unsigned int data[] = {
+            0x01234567, 0x12345678, 0x23456789, 0x3456789a,
+            0x456789ab, 0x56789abc, 0x6789abcd, 0x789abcde,
+            0x89abcdef, 0x9abcdef0, 0xabcdef01, 0xbcdef012,
+            0xcdef0123, 0xdef01234, 0xef012345, 0xf0123456
+        };
+        decl_insn(adx);
+        unsigned int cf, of;
+
+        asm volatile ( put_insn(adx, ".Lloop%=:\n\t"
+                                     "adcx (%[addr]), %k[dst1]\n\t"
+                                     "adox -%c[full]-%c[elem](%[addr],%[cnt],2*%c[elem]), %k[dst2]\n\t"
+                                     "lea %c[elem](%[addr]),%[addr]\n\t"
+                                     "loop .Lloop%=\n\t"
+                                     "adcx %k[cnt], %k[dst1]\n\t"
+                                     "adox %k[cnt], %k[dst2]\n\t" )
+                       : [addr] "=S" (regs.esi), [cnt] "=c" (regs.ecx),
+                         [dst1] "=a" (regs.eax), [dst2] "=d" (regs.edx)
+                       : [full] "i" (sizeof(data)), [elem] "i" (sizeof(*data)),
+                         "[addr]" (data), "[cnt]" (ARRAY_SIZE(data)),
+                         "[dst1]" (0), "[dst2]" (0) );
+
+        set_insn(adx);
+        regs.eflags = 0x2d6;
+        of = cf = i = 0;
+        while ( (rc = x86_emulate(&ctxt, &emulops)) == X86EMUL_OKAY )
+        {
+            ++i;
+            /*
+             * Count CF/OF being set after each loop iteration during the
+             * first half (to observe different counts), in order to catch
+             * the wrong flag being fiddled with.
+             */
+            if ( i < ARRAY_SIZE(data) * 2 && !(i % 4) )
+            {
+                if ( regs.eflags & 0x001 )
+                   ++cf;
+                if ( regs.eflags & 0x800 )
+                   ++of;
+            }
+            if ( !valid_eip(adx) )
+                break;
+        }
+        if ( (rc != X86EMUL_OKAY) ||
+             i != ARRAY_SIZE(data) * 4 + 2 || cf != 1 || of != 5 ||
+             regs.eax != 0xffffffff || regs.ecx || regs.edx != 0xffffffff ||
+             !check_eip(adx) || regs.eflags != 0x2d6 )
+            goto fail;
+        printf("okay\n");
+    }
 
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -60,6 +60,10 @@ int emul_test_cpuid(
     if ( leaf == 1 )
         res->c |= 1U << 22;
 
+    /* The emulator doesn't itself use ADCX/ADOX, so we can always run the test. */
+    if ( leaf == 7 && subleaf == 0 )
+        res->b |= 1U << 19;
+
     return X86EMUL_OKAY;
 }
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1345,6 +1345,7 @@ static bool vcpu_has(
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
 #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
 #define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
+#define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
 #define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
 #define vcpu_has_clflushopt()  vcpu_has(         7, EBX, 23, ctxt, ops)
 #define vcpu_has_clwb()        vcpu_has(         7, EBX, 24, ctxt, ops)
@@ -5864,6 +5865,40 @@ x86_emulate(
         }
         break;
 #endif
+
+    case X86EMUL_OPC_66(0x0f38, 0xf6): /* adcx r/m,r */
+    case X86EMUL_OPC_F3(0x0f38, 0xf6): /* adox r/m,r */
+    {
+        unsigned int mask = rep_prefix() ? EFLG_OF : EFLG_CF;
+        unsigned int aux = _regs._eflags & mask ? ~0 : 0;
+        bool carry;
+
+        vcpu_must_have(adx);
+#ifdef __x86_64__
+        if ( op_bytes == 8 )
+            asm ( "add %[aux],%[aux]\n\t"
+                  "adc %[src],%[dst]\n\t"
+                  ASM_FLAG_OUT(, "setc %[carry]")
+                  : [dst] "+r" (dst.val),
+                    [carry] ASM_FLAG_OUT("=@ccc", "=qm") (carry),
+                    [aux] "+r" (aux)
+                  : [src] "rm" (src.val) );
+        else
+#endif
+            asm ( "add %[aux],%[aux]\n\t"
+                  "adc %k[src],%k[dst]\n\t"
+                  ASM_FLAG_OUT(, "setc %[carry]")
+                  : [dst] "+r" (dst.val),
+                    [carry] ASM_FLAG_OUT("=@ccc", "=qm") (carry),
+                    [aux] "+r" (aux)
+                  : [src] "rm" (src.val) );
+        if ( carry )
+            _regs._eflags |= mask;
+        else
+            _regs._eflags &= ~mask;
+        break;
+    }
+
     default:
         goto cannot_emulate;
     }

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 3/8] x86emul: support BMI1 insns
  2017-01-13 15:11 [PATCH 0/8] x86emul: support various ISA extensions Jan Beulich
  2017-01-13 15:30 ` [PATCH 1/8] x86emul: support POPCNT Jan Beulich
  2017-01-13 15:31 ` [PATCH 2/8] x86emul: support ADCX/ADOX Jan Beulich
@ 2017-01-13 15:31 ` Jan Beulich
  2017-01-13 17:40   ` Andrew Cooper
  2017-01-13 15:32 ` [PATCH 4/8] x86emul: support BMI2 insns Jan Beulich
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-13 15:31 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 9043 bytes --]

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -892,6 +892,133 @@ int main(int argc, char **argv)
 #define check_eip(which) (regs.eip == (unsigned long)(which) + \
                                       (unsigned long)which##_len)
 
+    printf("%-40s", "Testing andn (%edx),%ecx,%ebx...");
+    if ( stack_exec && cpu_has_bmi1 )
+    {
+        decl_insn(andn);
+
+        asm volatile ( put_insn(andn, "andn (%0), %%ecx, %%ebx")
+                       :: "d" (NULL) );
+        set_insn(andn);
+
+        *res        = 0xfedcba98;
+        regs.ecx    = 0xcccc3333;
+        regs.edx    = (unsigned long)res;
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x32108888 ||
+             regs.ecx != 0xcccc3333 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(andn) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing bextr %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi1 )
+    {
+        decl_insn(bextr);
+#ifdef __x86_64__
+        decl_insn(bextr64);
+#endif
+
+        asm volatile ( put_insn(bextr, "bextr %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(bextr);
+
+        regs.ecx    = (unsigned long)res;
+        regs.edx    = 0x0a03;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != ((*res >> 3) & 0x3ff) ||
+             regs.edx != 0x0a03 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bextr) )
+            goto fail;
+        printf("okay\n");
+#ifdef __x86_64__
+        printf("%-40s", "Testing bextr %r9,(%r10),%r11...");
+
+        asm volatile ( put_insn(bextr64, "bextr %r9, (%r10), %r11") );
+        set_insn(bextr64);
+
+        res[0]      = 0x76543210;
+        res[1]      = 0xfedcba98;
+        regs.r10    = (unsigned long)res;
+        regs.r9     = 0x211e;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.r9 != 0x211e ||
+             regs.r11 != (((unsigned long)(res[1] << 1) << 1) |
+                          (res[0] >> 30)) ||
+             res[0] != 0x76543210 || res[1] != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bextr64) )
+            goto fail;
+        printf("okay\n");
+#endif
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsi (%edx),%ecx...");
+    if ( stack_exec && cpu_has_bmi1 )
+    {
+        decl_insn(blsi);
+
+        asm volatile ( put_insn(blsi, "blsi (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsi);
+
+        *res        = 0xfedcba98;
+        regs.edx    = (unsigned long)res;
+        regs.eflags = 0xac2;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ecx != 8 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x203 || !check_eip(blsi) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsmsk (%edx),%ecx...");
+    if ( stack_exec && cpu_has_bmi1 )
+    {
+        decl_insn(blsmsk);
+
+        asm volatile ( put_insn(blsmsk, "blsmsk (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsmsk);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ecx != 0xf || *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(blsmsk) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsr (%edx),%ecx...");
+    if ( stack_exec && cpu_has_bmi1 )
+    {
+        decl_insn(blsr);
+
+        asm volatile ( put_insn(blsr, "blsr (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsr);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ecx != 0xfedcba90 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(blsr) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing adcx/adox ...");
     {
         static const unsigned int data[] = {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -113,6 +113,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 5)) != 0; \
 })
 
+#define cpu_has_bmi1 ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(7, 0, &res, NULL); \
+    (res.b & (1U << 3)) != 0; \
+})
+
 int emul_test_cpuid(
     uint32_t leaf,
     uint32_t subleaf,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -676,6 +676,16 @@ do{ asm volatile (
 #define __emulate_1op_8byte(_op, _dst, _eflags)
 #endif /* __i386__ */
 
+#define emulate_stub(dst, src...) do {                                  \
+    unsigned long tmp;                                                  \
+    asm volatile ( _PRE_EFLAGS("[efl]", "[msk]", "[tmp]")               \
+                   "call *%[stub];"                                     \
+                   _POST_EFLAGS("[efl]", "[msk]", "[tmp]")              \
+                   : dst, [tmp] "=&r" (tmp), [efl] "+g" (_regs._eflags) \
+                   : [stub] "r" (stub.func),                            \
+                     [msk] "i" (EFLAGS_MASK), ## src );                 \
+} while (0)
+
 /* Fetch next part of the instruction being emulated. */
 #define insn_fetch_bytes(_size)                                         \
 ({ unsigned long _x = 0, _ip = state->ip;                               \
@@ -2295,7 +2305,10 @@ x86_decode(
                         }
                     }
                     else
+                    {
+                        ASSERT(op_bytes == 4);
                         vex.b = 1;
+                    }
                     switch ( b )
                     {
                     case 0x62:
@@ -5866,6 +5879,67 @@ x86_emulate(
         break;
 #endif
 
+    case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
+    case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
+    {
+        uint8_t *buf = get_stub(stub);
+        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
+
+        host_and_vcpu_must_have(bmi1);
+        generate_exception_if(vex.l, EXC_UD);
+
+        buf[0] = 0xc4;
+        *pvex = vex;
+        pvex->b = 1;
+        pvex->r = 1;
+        pvex->reg = ~0; /* rAX */
+        buf[3] = b;
+        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
+        buf[5] = 0xc3;
+
+        src.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
+                                  &_regs, 0);
+        emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" (*src.reg));
+
+        put_stub(stub);
+        break;
+    }
+
+    case X86EMUL_OPC_VEX(0x0f38, 0xf3): /* Grp 17 */
+    {
+        uint8_t *buf = get_stub(stub);
+        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
+
+        switch ( modrm_reg & 7 )
+        {
+        case 1: /* blsr r,r/m */
+        case 2: /* blsmsk r,r/m */
+        case 3: /* blsi r,r/m */
+            host_and_vcpu_must_have(bmi1);
+            break;
+        default:
+            goto cannot_emulate;
+        }
+
+        generate_exception_if(vex.l, EXC_UD);
+
+        buf[0] = 0xc4;
+        *pvex = vex;
+        pvex->b = 1;
+        pvex->r = 1;
+        pvex->reg = ~0; /* rAX */
+        buf[3] = b;
+        buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
+        buf[5] = 0xc3;
+
+        dst.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
+                                  &_regs, 0);
+        emulate_stub("=&a" (dst.val), "c" (&src.val));
+
+        put_stub(stub);
+        break;
+    }
+
     case X86EMUL_OPC_66(0x0f38, 0xf6): /* adcx r/m,r */
     case X86EMUL_OPC_F3(0x0f38, 0xf6): /* adox r/m,r */
     {
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -57,6 +57,7 @@
 #define cpu_has_xsave           boot_cpu_has(X86_FEATURE_XSAVE)
 #define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
 #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
+#define cpu_has_bmi1            boot_cpu_has(X86_FEATURE_BMI1)
 #define cpu_has_mpx             boot_cpu_has(X86_FEATURE_MPX)
 #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)



[-- Attachment #2: x86emul-BMI1.patch --]
[-- Type: text/plain, Size: 9070 bytes --]

x86emul: support BMI1 insns

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -892,6 +892,133 @@ int main(int argc, char **argv)
 #define check_eip(which) (regs.eip == (unsigned long)(which) + \
                                       (unsigned long)which##_len)
 
+    printf("%-40s", "Testing andn (%edx),%ecx,%ebx...");
+    if ( stack_exec && cpu_has_bmi1 )
+    {
+        decl_insn(andn);
+
+        asm volatile ( put_insn(andn, "andn (%0), %%ecx, %%ebx")
+                       :: "d" (NULL) );
+        set_insn(andn);
+
+        *res        = 0xfedcba98;
+        regs.ecx    = 0xcccc3333;
+        regs.edx    = (unsigned long)res;
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x32108888 ||
+             regs.ecx != 0xcccc3333 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(andn) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing bextr %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi1 )
+    {
+        decl_insn(bextr);
+#ifdef __x86_64__
+        decl_insn(bextr64);
+#endif
+
+        asm volatile ( put_insn(bextr, "bextr %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(bextr);
+
+        regs.ecx    = (unsigned long)res;
+        regs.edx    = 0x0a03;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != ((*res >> 3) & 0x3ff) ||
+             regs.edx != 0x0a03 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bextr) )
+            goto fail;
+        printf("okay\n");
+#ifdef __x86_64__
+        printf("%-40s", "Testing bextr %r9,(%r10),%r11...");
+
+        asm volatile ( put_insn(bextr64, "bextr %r9, (%r10), %r11") );
+        set_insn(bextr64);
+
+        res[0]      = 0x76543210;
+        res[1]      = 0xfedcba98;
+        regs.r10    = (unsigned long)res;
+        regs.r9     = 0x211e;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.r9 != 0x211e ||
+             regs.r11 != (((unsigned long)(res[1] << 1) << 1) |
+                          (res[0] >> 30)) ||
+             res[0] != 0x76543210 || res[1] != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bextr64) )
+            goto fail;
+        printf("okay\n");
+#endif
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsi (%edx),%ecx...");
+    if ( stack_exec && cpu_has_bmi1 )
+    {
+        decl_insn(blsi);
+
+        asm volatile ( put_insn(blsi, "blsi (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsi);
+
+        *res        = 0xfedcba98;
+        regs.edx    = (unsigned long)res;
+        regs.eflags = 0xac2;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ecx != 8 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x203 || !check_eip(blsi) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsmsk (%edx),%ecx...");
+    if ( stack_exec && cpu_has_bmi1 )
+    {
+        decl_insn(blsmsk);
+
+        asm volatile ( put_insn(blsmsk, "blsmsk (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsmsk);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ecx != 0xf || *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(blsmsk) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsr (%edx),%ecx...");
+    if ( stack_exec && cpu_has_bmi1 )
+    {
+        decl_insn(blsr);
+
+        asm volatile ( put_insn(blsr, "blsr (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsr);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ecx != 0xfedcba90 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(blsr) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing adcx/adox ...");
     {
         static const unsigned int data[] = {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -113,6 +113,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 5)) != 0; \
 })
 
+#define cpu_has_bmi1 ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(7, 0, &res, NULL); \
+    (res.b & (1U << 3)) != 0; \
+})
+
 int emul_test_cpuid(
     uint32_t leaf,
     uint32_t subleaf,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -676,6 +676,16 @@ do{ asm volatile (
 #define __emulate_1op_8byte(_op, _dst, _eflags)
 #endif /* __i386__ */
 
+#define emulate_stub(dst, src...) do {                                  \
+    unsigned long tmp;                                                  \
+    asm volatile ( _PRE_EFLAGS("[efl]", "[msk]", "[tmp]")               \
+                   "call *%[stub];"                                     \
+                   _POST_EFLAGS("[efl]", "[msk]", "[tmp]")              \
+                   : dst, [tmp] "=&r" (tmp), [efl] "+g" (_regs._eflags) \
+                   : [stub] "r" (stub.func),                            \
+                     [msk] "i" (EFLAGS_MASK), ## src );                 \
+} while (0)
+
 /* Fetch next part of the instruction being emulated. */
 #define insn_fetch_bytes(_size)                                         \
 ({ unsigned long _x = 0, _ip = state->ip;                               \
@@ -2295,7 +2305,10 @@ x86_decode(
                         }
                     }
                     else
+                    {
+                        ASSERT(op_bytes == 4);
                         vex.b = 1;
+                    }
                     switch ( b )
                     {
                     case 0x62:
@@ -5866,6 +5879,67 @@ x86_emulate(
         break;
 #endif
 
+    case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
+    case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
+    {
+        uint8_t *buf = get_stub(stub);
+        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
+
+        host_and_vcpu_must_have(bmi1);
+        generate_exception_if(vex.l, EXC_UD);
+
+        buf[0] = 0xc4;
+        *pvex = vex;
+        pvex->b = 1;
+        pvex->r = 1;
+        pvex->reg = ~0; /* rAX */
+        buf[3] = b;
+        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
+        buf[5] = 0xc3;
+
+        src.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
+                                  &_regs, 0);
+        emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" (*src.reg));
+
+        put_stub(stub);
+        break;
+    }
+
+    case X86EMUL_OPC_VEX(0x0f38, 0xf3): /* Grp 17 */
+    {
+        uint8_t *buf = get_stub(stub);
+        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
+
+        switch ( modrm_reg & 7 )
+        {
+        case 1: /* blsr r,r/m */
+        case 2: /* blsmsk r,r/m */
+        case 3: /* blsi r,r/m */
+            host_and_vcpu_must_have(bmi1);
+            break;
+        default:
+            goto cannot_emulate;
+        }
+
+        generate_exception_if(vex.l, EXC_UD);
+
+        buf[0] = 0xc4;
+        *pvex = vex;
+        pvex->b = 1;
+        pvex->r = 1;
+        pvex->reg = ~0; /* rAX */
+        buf[3] = b;
+        buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
+        buf[5] = 0xc3;
+
+        dst.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
+                                  &_regs, 0);
+        emulate_stub("=&a" (dst.val), "c" (&src.val));
+
+        put_stub(stub);
+        break;
+    }
+
     case X86EMUL_OPC_66(0x0f38, 0xf6): /* adcx r/m,r */
     case X86EMUL_OPC_F3(0x0f38, 0xf6): /* adox r/m,r */
     {
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -57,6 +57,7 @@
 #define cpu_has_xsave           boot_cpu_has(X86_FEATURE_XSAVE)
 #define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
 #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
+#define cpu_has_bmi1            boot_cpu_has(X86_FEATURE_BMI1)
 #define cpu_has_mpx             boot_cpu_has(X86_FEATURE_MPX)
 #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 4/8] x86emul: support BMI2 insns
  2017-01-13 15:11 [PATCH 0/8] x86emul: support various ISA extensions Jan Beulich
                   ` (2 preceding siblings ...)
  2017-01-13 15:31 ` [PATCH 3/8] x86emul: support BMI1 insns Jan Beulich
@ 2017-01-13 15:32 ` Jan Beulich
  2017-01-13 18:20   ` Andrew Cooper
  2017-01-13 15:32 ` [PATCH 5/8] x86emul: support TBM insns Jan Beulich
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-13 15:32 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 10499 bytes --]

Note that the adjustment to the mode_64bit() definition is so that we
can avoid "#ifdef __x86_64__" around the 64-bit asm() portions. An
alternative would be single asm()s with a conditional branch over the
(manually encoded) REX64 prefix.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -1019,6 +1019,178 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing bzhi %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(bzhi);
+
+        asm volatile ( put_insn(bzhi, "bzhi %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(bzhi);
+
+        regs.ecx    = (unsigned long)res;
+        regs.edx    = 0xff13;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != (*res & 0x7ffff) ||
+             regs.edx != 0xff13 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bzhi) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing mulx (%eax),%ecx,%ebx...");
+    if ( cpu_has_bmi2 )
+    {
+        decl_insn(mulx);
+
+        asm volatile ( put_insn(mulx, "mulx (%0), %%ecx, %%ebx")
+                       :: "a" (NULL) );
+        set_insn(mulx);
+
+        regs.eax    = (unsigned long)res;
+        regs.edx    = 0x12345678;
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x121fa00a ||
+             regs.ecx != 0x35068740 || *res != 0xfedcba98 ||
+             regs.eflags != 0xac3 || !check_eip(mulx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing pdep (%edx),%ecx,%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(pdep);
+
+        asm volatile ( put_insn(pdep, "pdep (%0), %%ecx, %%ebx")
+                       :: "d" (NULL) );
+        set_insn(pdep);
+
+        regs.ecx    = 0x8cef;
+        regs.edx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x850b298 ||
+             regs.ecx != 0x8cef || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(pdep) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing pext (%edx),%ecx,%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(pext);
+
+        asm volatile ( put_insn(pext, "pext (%0), %%ecx, %%ebx")
+                       :: "d" (NULL) );
+        set_insn(pext);
+
+        regs.ecx    = 0x137f8cef;
+        regs.edx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x12f95 ||
+             regs.ecx != 0x137f8cef || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(pext) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing rorx $16,(%ecx),%ebx...");
+    if ( cpu_has_bmi2 )
+    {
+        decl_insn(rorx);
+
+        asm volatile ( put_insn(rorx, "rorx $16, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(rorx);
+
+        regs.ecx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0xba98fedc ||
+             *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(rorx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing sarx %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(sarx);
+
+        asm volatile ( put_insn(sarx, "sarx %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(sarx);
+
+        regs.ecx    = (unsigned long)res;
+        regs.edx    = 0xff13;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.ebx != ((signed)*res >> (regs.edx & 0x1f)) ||
+             regs.edx != 0xff13 || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(sarx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing shlx %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(shlx);
+
+        asm volatile ( put_insn(shlx, "shlx %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(shlx);
+
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.ebx != (*res << (regs.edx & 0x1f)) ||
+             regs.edx != 0xff13 || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(shlx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing shrx %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(shrx);
+
+        asm volatile ( put_insn(shrx, "shrx %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(shrx);
+
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.ebx != (*res >> (regs.edx & 0x1f)) ||
+             regs.edx != 0xff13 || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(shrx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing adcx/adox ...");
     {
         static const unsigned int data[] = {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -119,6 +119,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 3)) != 0; \
 })
 
+#define cpu_has_bmi2 ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(7, 0, &res, NULL); \
+    (res.b & (1U << 8)) != 0; \
+})
+
 int emul_test_cpuid(
     uint32_t leaf,
     uint32_t subleaf,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -707,7 +707,11 @@ do{ asm volatile (
 })
 #define truncate_ea(ea) truncate_word((ea), ad_bytes)
 
-#define mode_64bit() (ctxt->addr_size == 64)
+#ifdef __x86_64__
+# define mode_64bit() (ctxt->addr_size == 64)
+#else
+# define mode_64bit() false
+#endif
 
 #define fail_if(p)                                      \
 do {                                                    \
@@ -1353,6 +1357,7 @@ static bool vcpu_has(
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
+#define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
 #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
 #define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
 #define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
@@ -5880,12 +5885,21 @@ x86_emulate(
 #endif
 
     case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
+    case X86EMUL_OPC_VEX(0x0f38, 0xf5):    /* bzhi r,r/m,r */
+    case X86EMUL_OPC_VEX_F3(0x0f38, 0xf5): /* pext r/m,r,r */
+    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf5): /* pdep r/m,r,r */
     case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0xf7): /* shlx r,r/m,r */
+    case X86EMUL_OPC_VEX_F3(0x0f38, 0xf7): /* sarx r,r/m,r */
+    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf7): /* shrx r,r/m,r */
     {
         uint8_t *buf = get_stub(stub);
         typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
 
-        host_and_vcpu_must_have(bmi1);
+        if ( b == 0xf5 || vex.pfx )
+            host_and_vcpu_must_have(bmi2);
+        else
+            host_and_vcpu_must_have(bmi1);
         generate_exception_if(vex.l, EXC_UD);
 
         buf[0] = 0xc4;
@@ -5973,6 +5987,33 @@ x86_emulate(
         break;
     }
 
+    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf6): /* mulx r/m,r,r */
+        vcpu_must_have(bmi2);
+        generate_exception_if(vex.l, EXC_UD);
+        ea.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
+                                 &_regs, 0);
+        if ( mode_64bit() && vex.w )
+            asm ( "mulq %3" : "=a" (*ea.reg), "=d" (dst.val)
+                            : "0" (src.val), "rm" (_regs.r(dx)) );
+        else
+            asm ( "mull %3" : "=a" (*ea.reg), "=d" (dst.val)
+                            : "0" ((uint32_t)src.val), "rm" (_regs._edx) );
+        break;
+
+    case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
+        vcpu_must_have(bmi2);
+        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
+        if ( ea.type == OP_REG )
+            src.val = *ea.reg;
+        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
+                                   ctxt, ops)) != X86EMUL_OKAY )
+            goto done;
+        if ( mode_64bit() && vex.w )
+            asm ( "rorq %b1,%0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
+        else
+            asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
+        break;
+
     default:
         goto cannot_emulate;
     }
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -58,6 +58,7 @@
 #define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
 #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
 #define cpu_has_bmi1            boot_cpu_has(X86_FEATURE_BMI1)
+#define cpu_has_bmi2            boot_cpu_has(X86_FEATURE_BMI2)
 #define cpu_has_mpx             boot_cpu_has(X86_FEATURE_MPX)
 #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)



[-- Attachment #2: x86emul-BMI2.patch --]
[-- Type: text/plain, Size: 10526 bytes --]

x86emul: support BMI2 insns

Note that the adjustment to the mode_64bit() definition is so that we
can avoid "#ifdef __x86_64__" around the 64-bit asm() portions. An
alternative would be single asm()s with a conditional branch over the
(manually encoded) REX64 prefix.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -1019,6 +1019,178 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing bzhi %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(bzhi);
+
+        asm volatile ( put_insn(bzhi, "bzhi %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(bzhi);
+
+        regs.ecx    = (unsigned long)res;
+        regs.edx    = 0xff13;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != (*res & 0x7ffff) ||
+             regs.edx != 0xff13 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bzhi) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing mulx (%eax),%ecx,%ebx...");
+    if ( cpu_has_bmi2 )
+    {
+        decl_insn(mulx);
+
+        asm volatile ( put_insn(mulx, "mulx (%0), %%ecx, %%ebx")
+                       :: "a" (NULL) );
+        set_insn(mulx);
+
+        regs.eax    = (unsigned long)res;
+        regs.edx    = 0x12345678;
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x121fa00a ||
+             regs.ecx != 0x35068740 || *res != 0xfedcba98 ||
+             regs.eflags != 0xac3 || !check_eip(mulx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing pdep (%edx),%ecx,%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(pdep);
+
+        asm volatile ( put_insn(pdep, "pdep (%0), %%ecx, %%ebx")
+                       :: "d" (NULL) );
+        set_insn(pdep);
+
+        regs.ecx    = 0x8cef;
+        regs.edx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x850b298 ||
+             regs.ecx != 0x8cef || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(pdep) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing pext (%edx),%ecx,%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(pext);
+
+        asm volatile ( put_insn(pext, "pext (%0), %%ecx, %%ebx")
+                       :: "d" (NULL) );
+        set_insn(pext);
+
+        regs.ecx    = 0x137f8cef;
+        regs.edx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x12f95 ||
+             regs.ecx != 0x137f8cef || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(pext) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing rorx $16,(%ecx),%ebx...");
+    if ( cpu_has_bmi2 )
+    {
+        decl_insn(rorx);
+
+        asm volatile ( put_insn(rorx, "rorx $16, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(rorx);
+
+        regs.ecx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0xba98fedc ||
+             *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(rorx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing sarx %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(sarx);
+
+        asm volatile ( put_insn(sarx, "sarx %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(sarx);
+
+        regs.ecx    = (unsigned long)res;
+        regs.edx    = 0xff13;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.ebx != ((signed)*res >> (regs.edx & 0x1f)) ||
+             regs.edx != 0xff13 || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(sarx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing shlx %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(shlx);
+
+        asm volatile ( put_insn(shlx, "shlx %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(shlx);
+
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.ebx != (*res << (regs.edx & 0x1f)) ||
+             regs.edx != 0xff13 || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(shlx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing shrx %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(shrx);
+
+        asm volatile ( put_insn(shrx, "shrx %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(shrx);
+
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.ebx != (*res >> (regs.edx & 0x1f)) ||
+             regs.edx != 0xff13 || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(shrx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing adcx/adox ...");
     {
         static const unsigned int data[] = {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -119,6 +119,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 3)) != 0; \
 })
 
+#define cpu_has_bmi2 ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(7, 0, &res, NULL); \
+    (res.b & (1U << 8)) != 0; \
+})
+
 int emul_test_cpuid(
     uint32_t leaf,
     uint32_t subleaf,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -707,7 +707,11 @@ do{ asm volatile (
 })
 #define truncate_ea(ea) truncate_word((ea), ad_bytes)
 
-#define mode_64bit() (ctxt->addr_size == 64)
+#ifdef __x86_64__
+# define mode_64bit() (ctxt->addr_size == 64)
+#else
+# define mode_64bit() false
+#endif
 
 #define fail_if(p)                                      \
 do {                                                    \
@@ -1353,6 +1357,7 @@ static bool vcpu_has(
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
+#define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
 #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
 #define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
 #define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
@@ -5880,12 +5885,21 @@ x86_emulate(
 #endif
 
     case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
+    case X86EMUL_OPC_VEX(0x0f38, 0xf5):    /* bzhi r,r/m,r */
+    case X86EMUL_OPC_VEX_F3(0x0f38, 0xf5): /* pext r/m,r,r */
+    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf5): /* pdep r/m,r,r */
     case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0xf7): /* shlx r,r/m,r */
+    case X86EMUL_OPC_VEX_F3(0x0f38, 0xf7): /* sarx r,r/m,r */
+    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf7): /* shrx r,r/m,r */
     {
         uint8_t *buf = get_stub(stub);
         typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
 
-        host_and_vcpu_must_have(bmi1);
+        if ( b == 0xf5 || vex.pfx )
+            host_and_vcpu_must_have(bmi2);
+        else
+            host_and_vcpu_must_have(bmi1);
         generate_exception_if(vex.l, EXC_UD);
 
         buf[0] = 0xc4;
@@ -5973,6 +5987,33 @@ x86_emulate(
         break;
     }
 
+    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf6): /* mulx r/m,r,r */
+        vcpu_must_have(bmi2);
+        generate_exception_if(vex.l, EXC_UD);
+        ea.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
+                                 &_regs, 0);
+        if ( mode_64bit() && vex.w )
+            asm ( "mulq %3" : "=a" (*ea.reg), "=d" (dst.val)
+                            : "0" (src.val), "rm" (_regs.r(dx)) );
+        else
+            asm ( "mull %3" : "=a" (*ea.reg), "=d" (dst.val)
+                            : "0" ((uint32_t)src.val), "rm" (_regs._edx) );
+        break;
+
+    case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
+        vcpu_must_have(bmi2);
+        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
+        if ( ea.type == OP_REG )
+            src.val = *ea.reg;
+        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
+                                   ctxt, ops)) != X86EMUL_OKAY )
+            goto done;
+        if ( mode_64bit() && vex.w )
+            asm ( "rorq %b1,%0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
+        else
+            asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
+        break;
+
     default:
         goto cannot_emulate;
     }
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -58,6 +58,7 @@
 #define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
 #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
 #define cpu_has_bmi1            boot_cpu_has(X86_FEATURE_BMI1)
+#define cpu_has_bmi2            boot_cpu_has(X86_FEATURE_BMI2)
 #define cpu_has_mpx             boot_cpu_has(X86_FEATURE_MPX)
 #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 5/8] x86emul: support TBM insns
  2017-01-13 15:11 [PATCH 0/8] x86emul: support various ISA extensions Jan Beulich
                   ` (3 preceding siblings ...)
  2017-01-13 15:32 ` [PATCH 4/8] x86emul: support BMI2 insns Jan Beulich
@ 2017-01-13 15:32 ` Jan Beulich
  2017-01-13 18:48   ` Andrew Cooper
  2017-01-13 15:34 ` [PATCH 6/8] x86emul: support RDRAND/RDSEED Jan Beulich
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-13 15:32 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 11793 bytes --]

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -1244,6 +1244,234 @@ int main(int argc, char **argv)
         printf("okay\n");
     }
 
+    printf("%-40s", "Testing bextr $0x0a03,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(bextr_imm);
+#ifdef __x86_64__
+        decl_insn(bextr64_imm);
+#endif
+
+        asm volatile ( put_insn(bextr_imm, "bextr $0x0a03, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(bextr_imm);
+
+        *res        = 0xfedcba98;
+        regs.ecx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != ((*res >> 3) & 0x3ff) ||
+             *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bextr_imm) )
+            goto fail;
+        printf("okay\n");
+#ifdef __x86_64__
+        printf("%-40s", "Testing bextr $0x211e,(%r10),%r11...");
+
+        asm volatile ( put_insn(bextr64_imm, "bextr $0x211e, (%r10), %r11") );
+        set_insn(bextr64_imm);
+
+        res[0]      = 0x76543210;
+        res[1]      = 0xfedcba98;
+        regs.r10    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.r11 != (((unsigned long)(res[1] << 1) << 1) |
+                          (res[0] >> 30)) ||
+             res[0] != 0x76543210 || res[1] != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bextr64_imm) )
+            goto fail;
+        printf("okay\n");
+#endif
+    }
+    else
+        printf("skipped\n");
+
+    res[0]      = 0xfedcba98;
+    res[1]      = 0x01234567;
+    regs.edx    = (unsigned long)res;
+
+    printf("%-40s", "Testing blcfill 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcfill);
+
+        asm volatile ( put_insn(blcfill, "blcfill 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcfill);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) & res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcfill) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blci 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blci);
+
+        asm volatile ( put_insn(blci, "blci 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blci);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != (~(res[1] + 1) | res[1]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(blci) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blcic 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcic);
+
+        asm volatile ( put_insn(blcic, "blcic 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcic);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) & ~res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcic) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blcmsk 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcmsk);
+
+        asm volatile ( put_insn(blcmsk, "blcmsk 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcmsk);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) ^ res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcmsk) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blcs 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcs);
+
+        asm volatile ( put_insn(blcs, "blcs 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcs);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) | res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcs) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsfill (%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blsfill);
+
+        asm volatile ( put_insn(blsfill, "blsfill (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsfill);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[0] != 0xfedcba98 ||
+             regs.ecx != ((res[0] - 1) | res[0]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(blsfill) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsic (%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blsic);
+
+        asm volatile ( put_insn(blsic, "blsic (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsic);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[0] != 0xfedcba98 ||
+             regs.ecx != ((res[0] - 1) | ~res[0]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(blsic) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing t1mskc 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(t1mskc);
+
+        asm volatile ( put_insn(t1mskc, "t1mskc 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(t1mskc);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) | ~res[1]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(t1mskc) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing tzmsk (%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(tzmsk);
+
+        asm volatile ( put_insn(tzmsk, "tzmsk (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(tzmsk);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[0] != 0xfedcba98 ||
+             regs.ecx != ((res[0] - 1) & ~res[0]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(tzmsk) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -125,6 +125,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 8)) != 0; \
 })
 
+#define cpu_has_tbm ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(0x80000001, 0, &res, NULL); \
+    (res.c & (1U << 21)) != 0; \
+})
+
 int emul_test_cpuid(
     uint32_t leaf,
     uint32_t subleaf,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1355,6 +1355,7 @@ static bool vcpu_has(
 #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
+#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
 #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
@@ -6014,6 +6015,85 @@ x86_emulate(
             asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
         break;
 
+    case X86EMUL_OPC(0x8f09, 0x01): /* XOP Grp1 */
+        switch ( modrm_reg & 7 )
+        {
+        case 1: /* blcfill r/m,r */
+        case 2: /* blsfill r/m,r */
+        case 3: /* blcs r/m,r */
+        case 4: /* tzmsk r/m,r */
+        case 5: /* blcic r/m,r */
+        case 6: /* blsic r/m,r */
+        case 7: /* t1mskc r/m,r */
+            host_and_vcpu_must_have(tbm);
+            break;
+        default:
+            goto cannot_emulate;
+        }
+
+    xop_09_rm_rv:
+    {
+        uint8_t *buf = get_stub(stub);
+        typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
+
+        generate_exception_if(vex.l, EXC_UD);
+
+        buf[0] = 0x8f;
+        *pxop = vex;
+        pxop->b = 1;
+        pxop->r = 1;
+        pxop->reg = ~0; /* rAX */
+        buf[3] = b;
+        buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
+        buf[5] = 0xc3;
+
+        dst.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
+                                  &_regs, 0);
+        emulate_stub([dst] "=&a" (dst.val), "c" (&src.val));
+
+        put_stub(stub);
+        break;
+    }
+
+    case X86EMUL_OPC(0x8f09, 0x02): /* XOP Grp2 */
+        switch ( modrm_reg & 7 )
+        {
+        case 1: /* blcmsk r/m,r */
+        case 6: /* blci r/m,r */
+            host_and_vcpu_must_have(tbm);
+            goto xop_09_rm_rv;
+        }
+        goto cannot_emulate;
+
+    case X86EMUL_OPC(0x8f0a, 0x10): /* bextr imm,r/m,r */
+    {
+        uint8_t *buf = get_stub(stub);
+        typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
+
+        host_and_vcpu_must_have(tbm);
+        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
+
+        if ( ea.type == OP_REG )
+            src.val = *ea.reg;
+        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
+                                   ctxt, ops)) != X86EMUL_OKAY )
+            goto done;
+
+        buf[0] = 0x8f;
+        *pxop = vex;
+        pxop->b = 1;
+        pxop->r = 1;
+        buf[3] = b;
+        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
+        *(uint32_t *)(buf + 5) = imm1;
+        buf[9] = 0xc3;
+
+        emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val));
+
+        put_stub(stub);
+        break;
+    }
+
     default:
         goto cannot_emulate;
     }
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -74,6 +74,7 @@
 #define cpu_has_eist		boot_cpu_has(X86_FEATURE_EIST)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 #define cpu_has_cmp_legacy	boot_cpu_has(X86_FEATURE_CMP_LEGACY)
+#define cpu_has_tbm		boot_cpu_has(X86_FEATURE_TBM)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,



[-- Attachment #2: x86emul-TBM.patch --]
[-- Type: text/plain, Size: 11819 bytes --]

x86emul: support TBM insns

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -1244,6 +1244,234 @@ int main(int argc, char **argv)
         printf("okay\n");
     }
 
+    printf("%-40s", "Testing bextr $0x0a03,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(bextr_imm);
+#ifdef __x86_64__
+        decl_insn(bextr64_imm);
+#endif
+
+        asm volatile ( put_insn(bextr_imm, "bextr $0x0a03, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(bextr_imm);
+
+        *res        = 0xfedcba98;
+        regs.ecx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != ((*res >> 3) & 0x3ff) ||
+             *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bextr_imm) )
+            goto fail;
+        printf("okay\n");
+#ifdef __x86_64__
+        printf("%-40s", "Testing bextr $0x211e,(%r10),%r11...");
+
+        asm volatile ( put_insn(bextr64_imm, "bextr $0x211e, (%r10), %r11") );
+        set_insn(bextr64_imm);
+
+        res[0]      = 0x76543210;
+        res[1]      = 0xfedcba98;
+        regs.r10    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.r11 != (((unsigned long)(res[1] << 1) << 1) |
+                          (res[0] >> 30)) ||
+             res[0] != 0x76543210 || res[1] != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bextr64_imm) )
+            goto fail;
+        printf("okay\n");
+#endif
+    }
+    else
+        printf("skipped\n");
+
+    res[0]      = 0xfedcba98;
+    res[1]      = 0x01234567;
+    regs.edx    = (unsigned long)res;
+
+    printf("%-40s", "Testing blcfill 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcfill);
+
+        asm volatile ( put_insn(blcfill, "blcfill 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcfill);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) & res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcfill) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blci 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blci);
+
+        asm volatile ( put_insn(blci, "blci 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blci);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != (~(res[1] + 1) | res[1]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(blci) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blcic 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcic);
+
+        asm volatile ( put_insn(blcic, "blcic 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcic);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) & ~res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcic) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blcmsk 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcmsk);
+
+        asm volatile ( put_insn(blcmsk, "blcmsk 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcmsk);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) ^ res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcmsk) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blcs 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcs);
+
+        asm volatile ( put_insn(blcs, "blcs 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcs);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) | res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcs) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsfill (%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blsfill);
+
+        asm volatile ( put_insn(blsfill, "blsfill (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsfill);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[0] != 0xfedcba98 ||
+             regs.ecx != ((res[0] - 1) | res[0]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(blsfill) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsic (%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blsic);
+
+        asm volatile ( put_insn(blsic, "blsic (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsic);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[0] != 0xfedcba98 ||
+             regs.ecx != ((res[0] - 1) | ~res[0]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(blsic) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing t1mskc 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(t1mskc);
+
+        asm volatile ( put_insn(t1mskc, "t1mskc 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(t1mskc);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) | ~res[1]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(t1mskc) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing tzmsk (%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(tzmsk);
+
+        asm volatile ( put_insn(tzmsk, "tzmsk (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(tzmsk);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[0] != 0xfedcba98 ||
+             regs.ecx != ((res[0] - 1) & ~res[0]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(tzmsk) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -125,6 +125,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 8)) != 0; \
 })
 
+#define cpu_has_tbm ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(0x80000001, 0, &res, NULL); \
+    (res.c & (1U << 21)) != 0; \
+})
+
 int emul_test_cpuid(
     uint32_t leaf,
     uint32_t subleaf,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1355,6 +1355,7 @@ static bool vcpu_has(
 #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
+#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
 #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
@@ -6014,6 +6015,85 @@ x86_emulate(
             asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
         break;
 
+    case X86EMUL_OPC(0x8f09, 0x01): /* XOP Grp1 */
+        switch ( modrm_reg & 7 )
+        {
+        case 1: /* blcfill r/m,r */
+        case 2: /* blsfill r/m,r */
+        case 3: /* blcs r/m,r */
+        case 4: /* tzmsk r/m,r */
+        case 5: /* blcic r/m,r */
+        case 6: /* blsic r/m,r */
+        case 7: /* t1mskc r/m,r */
+            host_and_vcpu_must_have(tbm);
+            break;
+        default:
+            goto cannot_emulate;
+        }
+
+    xop_09_rm_rv:
+    {
+        uint8_t *buf = get_stub(stub);
+        typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
+
+        generate_exception_if(vex.l, EXC_UD);
+
+        buf[0] = 0x8f;
+        *pxop = vex;
+        pxop->b = 1;
+        pxop->r = 1;
+        pxop->reg = ~0; /* rAX */
+        buf[3] = b;
+        buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
+        buf[5] = 0xc3;
+
+        dst.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
+                                  &_regs, 0);
+        emulate_stub([dst] "=&a" (dst.val), "c" (&src.val));
+
+        put_stub(stub);
+        break;
+    }
+
+    case X86EMUL_OPC(0x8f09, 0x02): /* XOP Grp2 */
+        switch ( modrm_reg & 7 )
+        {
+        case 1: /* blcmsk r/m,r */
+        case 6: /* blci r/m,r */
+            host_and_vcpu_must_have(tbm);
+            goto xop_09_rm_rv;
+        }
+        goto cannot_emulate;
+
+    case X86EMUL_OPC(0x8f0a, 0x10): /* bextr imm,r/m,r */
+    {
+        uint8_t *buf = get_stub(stub);
+        typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
+
+        host_and_vcpu_must_have(tbm);
+        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
+
+        if ( ea.type == OP_REG )
+            src.val = *ea.reg;
+        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
+                                   ctxt, ops)) != X86EMUL_OKAY )
+            goto done;
+
+        buf[0] = 0x8f;
+        *pxop = vex;
+        pxop->b = 1;
+        pxop->r = 1;
+        buf[3] = b;
+        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
+        *(uint32_t *)(buf + 5) = imm1;
+        buf[9] = 0xc3;
+
+        emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val));
+
+        put_stub(stub);
+        break;
+    }
+
     default:
         goto cannot_emulate;
     }
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -74,6 +74,7 @@
 #define cpu_has_eist		boot_cpu_has(X86_FEATURE_EIST)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 #define cpu_has_cmp_legacy	boot_cpu_has(X86_FEATURE_CMP_LEGACY)
+#define cpu_has_tbm		boot_cpu_has(X86_FEATURE_TBM)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 6/8] x86emul: support RDRAND/RDSEED
  2017-01-13 15:11 [PATCH 0/8] x86emul: support various ISA extensions Jan Beulich
                   ` (4 preceding siblings ...)
  2017-01-13 15:32 ` [PATCH 5/8] x86emul: support TBM insns Jan Beulich
@ 2017-01-13 15:34 ` Jan Beulich
  2017-01-13 18:55   ` Andrew Cooper
  2017-01-13 15:34 ` [PATCH 7/8] x86emul: support RDPID Jan Beulich
  2017-01-13 15:35 ` [PATCH 8/8] x86emul: rename the no_writeback label Jan Beulich
  7 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-13 15:34 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 5562 bytes --]

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -14,7 +14,9 @@ $(call cc-option-add,CFLAGS,CC,-Wnested-
 $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
 $(call as-insn-check,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_GAS_SSE4_2)
 $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
+$(call as-insn-check,CFLAGS,CC,"rdrand %eax",-DHAVE_GAS_RDRAND)
 $(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
+$(call as-insn-check,CFLAGS,CC,"rdseed %eax",-DHAVE_GAS_RDSEED)
 $(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \
                      -U__OBJECT_LABEL__ -DHAVE_GAS_QUOTED_SYM \
                      '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@')
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1351,6 +1351,7 @@ static bool vcpu_has(
 #define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
 #define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)
 #define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
+#define vcpu_has_rdrand()      vcpu_has(         1, ECX, 30, ctxt, ops)
 #define vcpu_has_lahf_lm()     vcpu_has(0x80000001, ECX,  0, ctxt, ops)
 #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
@@ -1361,6 +1362,7 @@ static bool vcpu_has(
 #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
 #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
 #define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
+#define vcpu_has_rdseed()      vcpu_has(         7, EBX, 18, ctxt, ops)
 #define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
 #define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
 #define vcpu_has_clflushopt()  vcpu_has(         7, EBX, 23, ctxt, ops)
@@ -5737,14 +5739,82 @@ x86_emulate(
         dst.val = src.val;
         break;
 
-    case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
+    case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */ {
         union {
             uint32_t u32[2];
             uint64_t u64[2];
         } *old, *aux;
 
+        if ( ea.type == OP_REG )
+        {
+            bool __maybe_unused carry;
+
+            switch ( modrm_reg & 7 )
+            {
+            default:
+                goto cannot_emulate;
+
+#ifdef HAVE_GAS_RDRAND
+            case 6: /* rdrand */
+                generate_exception_if(rep_prefix(), EXC_UD);
+                host_and_vcpu_must_have(rdrand);
+                dst = ea;
+                switch ( op_bytes )
+                {
+                case 2:
+                    asm ( "rdrand %w0" ASM_FLAG_OUT(, "; setc %1")
+                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                    break;
+                default:
+# ifdef __x86_64__
+                    asm ( "rdrand %k0" ASM_FLAG_OUT(, "; setc %1")
+                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                    break;
+                case 8:
+# endif
+                    asm ( "rdrand %0" ASM_FLAG_OUT(, "; setc %1")
+                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                    break;
+                }
+                _regs._eflags &= ~EFLAGS_MASK;
+                if ( carry )
+                    _regs._eflags |= EFLG_CF;
+                break;
+#endif
+
+#ifdef HAVE_GAS_RDSEED
+            case 7: /* rdseed */
+                generate_exception_if(rep_prefix(), EXC_UD);
+                host_and_vcpu_must_have(rdseed);
+                dst = ea;
+                switch ( op_bytes )
+                {
+                case 2:
+                    asm ( "rdseed %w0" ASM_FLAG_OUT(, "; setc %1")
+                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                    break;
+                default:
+# ifdef __x86_64__
+                    asm ( "rdseed %k0" ASM_FLAG_OUT(, "; setc %1")
+                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                    break;
+                case 8:
+# endif
+                    asm ( "rdseed %0" ASM_FLAG_OUT(, "; setc %1")
+                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                    break;
+                }
+                _regs._eflags &= ~EFLAGS_MASK;
+                if ( carry )
+                    _regs._eflags |= EFLG_CF;
+                break;
+#endif
+            }
+            break;
+        }
+
+        /* cmpxchg8b/cmpxchg16b */
         generate_exception_if((modrm_reg & 7) != 1, EXC_UD);
-        generate_exception_if(ea.type != OP_MEM, EXC_UD);
         fail_if(!ops->cmpxchg);
         if ( rex_prefix & REX_W )
         {
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -73,6 +73,8 @@
 #define cpu_has_monitor		boot_cpu_has(X86_FEATURE_MONITOR)
 #define cpu_has_eist		boot_cpu_has(X86_FEATURE_EIST)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
+#define cpu_has_rdrand		boot_cpu_has(X86_FEATURE_RDRAND)
+#define cpu_has_rdseed		boot_cpu_has(X86_FEATURE_RDSEED)
 #define cpu_has_cmp_legacy	boot_cpu_has(X86_FEATURE_CMP_LEGACY)
 #define cpu_has_tbm		boot_cpu_has(X86_FEATURE_TBM)
 



[-- Attachment #2: x86emul-RDRAND.patch --]
[-- Type: text/plain, Size: 5592 bytes --]

x86emul: support RDRAND/RDSEED

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -14,7 +14,9 @@ $(call cc-option-add,CFLAGS,CC,-Wnested-
 $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
 $(call as-insn-check,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_GAS_SSE4_2)
 $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
+$(call as-insn-check,CFLAGS,CC,"rdrand %eax",-DHAVE_GAS_RDRAND)
 $(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
+$(call as-insn-check,CFLAGS,CC,"rdseed %eax",-DHAVE_GAS_RDSEED)
 $(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \
                      -U__OBJECT_LABEL__ -DHAVE_GAS_QUOTED_SYM \
                      '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@')
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1351,6 +1351,7 @@ static bool vcpu_has(
 #define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
 #define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)
 #define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
+#define vcpu_has_rdrand()      vcpu_has(         1, ECX, 30, ctxt, ops)
 #define vcpu_has_lahf_lm()     vcpu_has(0x80000001, ECX,  0, ctxt, ops)
 #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
@@ -1361,6 +1362,7 @@ static bool vcpu_has(
 #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
 #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
 #define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
+#define vcpu_has_rdseed()      vcpu_has(         7, EBX, 18, ctxt, ops)
 #define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
 #define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
 #define vcpu_has_clflushopt()  vcpu_has(         7, EBX, 23, ctxt, ops)
@@ -5737,14 +5739,82 @@ x86_emulate(
         dst.val = src.val;
         break;
 
-    case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
+    case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */ {
         union {
             uint32_t u32[2];
             uint64_t u64[2];
         } *old, *aux;
 
+        if ( ea.type == OP_REG )
+        {
+            bool __maybe_unused carry;
+
+            switch ( modrm_reg & 7 )
+            {
+            default:
+                goto cannot_emulate;
+
+#ifdef HAVE_GAS_RDRAND
+            case 6: /* rdrand */
+                generate_exception_if(rep_prefix(), EXC_UD);
+                host_and_vcpu_must_have(rdrand);
+                dst = ea;
+                switch ( op_bytes )
+                {
+                case 2:
+                    asm ( "rdrand %w0" ASM_FLAG_OUT(, "; setc %1")
+                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                    break;
+                default:
+# ifdef __x86_64__
+                    asm ( "rdrand %k0" ASM_FLAG_OUT(, "; setc %1")
+                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                    break;
+                case 8:
+# endif
+                    asm ( "rdrand %0" ASM_FLAG_OUT(, "; setc %1")
+                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                    break;
+                }
+                _regs._eflags &= ~EFLAGS_MASK;
+                if ( carry )
+                    _regs._eflags |= EFLG_CF;
+                break;
+#endif
+
+#ifdef HAVE_GAS_RDSEED
+            case 7: /* rdseed */
+                generate_exception_if(rep_prefix(), EXC_UD);
+                host_and_vcpu_must_have(rdseed);
+                dst = ea;
+                switch ( op_bytes )
+                {
+                case 2:
+                    asm ( "rdseed %w0" ASM_FLAG_OUT(, "; setc %1")
+                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                    break;
+                default:
+# ifdef __x86_64__
+                    asm ( "rdseed %k0" ASM_FLAG_OUT(, "; setc %1")
+                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                    break;
+                case 8:
+# endif
+                    asm ( "rdseed %0" ASM_FLAG_OUT(, "; setc %1")
+                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                    break;
+                }
+                _regs._eflags &= ~EFLAGS_MASK;
+                if ( carry )
+                    _regs._eflags |= EFLG_CF;
+                break;
+#endif
+            }
+            break;
+        }
+
+        /* cmpxchg8b/cmpxchg16b */
         generate_exception_if((modrm_reg & 7) != 1, EXC_UD);
-        generate_exception_if(ea.type != OP_MEM, EXC_UD);
         fail_if(!ops->cmpxchg);
         if ( rex_prefix & REX_W )
         {
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -73,6 +73,8 @@
 #define cpu_has_monitor		boot_cpu_has(X86_FEATURE_MONITOR)
 #define cpu_has_eist		boot_cpu_has(X86_FEATURE_EIST)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
+#define cpu_has_rdrand		boot_cpu_has(X86_FEATURE_RDRAND)
+#define cpu_has_rdseed		boot_cpu_has(X86_FEATURE_RDSEED)
 #define cpu_has_cmp_legacy	boot_cpu_has(X86_FEATURE_CMP_LEGACY)
 #define cpu_has_tbm		boot_cpu_has(X86_FEATURE_TBM)
 

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 7/8] x86emul: support RDPID
  2017-01-13 15:11 [PATCH 0/8] x86emul: support various ISA extensions Jan Beulich
                   ` (5 preceding siblings ...)
  2017-01-13 15:34 ` [PATCH 6/8] x86emul: support RDRAND/RDSEED Jan Beulich
@ 2017-01-13 15:34 ` Jan Beulich
  2017-01-13 19:00   ` Andrew Cooper
  2017-01-13 15:35 ` [PATCH 8/8] x86emul: rename the no_writeback label Jan Beulich
  7 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-13 15:34 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 3724 bytes --]

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -158,6 +158,11 @@ static int read_msr(
     case 0xc0000080: /* EFER */
         *val = ctxt->addr_size > 32 ? 0x500 /* LME|LMA */ : 0;
         return X86EMUL_OKAY;
+
+    case 0xc0000103: /* TSC_AUX */
+#define TSC_AUX_VALUE 0xCACACACA
+        *val = TSC_AUX_VALUE;
+        return X86EMUL_OKAY;
     }
 
     return X86EMUL_UNHANDLEABLE;
@@ -1472,6 +1477,16 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing rdpid %ecx...");
+    instr[0] = 0xF3; instr[1] = 0x0f; instr[2] = 0xC7; instr[3] = 0xf9;
+    regs.eip = (unsigned long)&instr[0];
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (regs.ecx != TSC_AUX_VALUE) ||
+         (regs.eip != (unsigned long)&instr[4]) )
+        goto fail;
+    printf("okay\n");
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -60,9 +60,15 @@ int emul_test_cpuid(
     if ( leaf == 1 )
         res->c |= 1U << 22;
 
-    /* The emulator doesn't itself use ADCX/ADOX, so we can always run the test. */
+    /*
+     * The emulator doesn't itself use ADCX/ADOX/RDPID, so we can always run
+     * the respective tests.
+     */
     if ( leaf == 7 && subleaf == 0 )
+    {
         res->b |= 1U << 19;
+        res->c |= 1U << 22;
+    }
 
     return X86EMUL_OKAY;
 }
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1367,6 +1367,7 @@ static bool vcpu_has(
 #define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
 #define vcpu_has_clflushopt()  vcpu_has(         7, EBX, 23, ctxt, ops)
 #define vcpu_has_clwb()        vcpu_has(         7, EBX, 24, ctxt, ops)
+#define vcpu_has_rdpid()       vcpu_has(         7, ECX, 22, ctxt, ops)
 
 #define vcpu_must_have(feat) \
     generate_exception_if(!vcpu_has_##feat(), EXC_UD)
@@ -5782,8 +5783,23 @@ x86_emulate(
                 break;
 #endif
 
+            case 7: /* rdseed / rdpid */
+                if ( repe_prefix() ) /* rdpid */
+                {
+                    uint64_t tsc_aux;
+
+                    generate_exception_if(ea.type != OP_REG, EXC_UD);
+                    vcpu_must_have(rdpid);
+                    fail_if(!ops->read_msr);
+                    if ( (rc = ops->read_msr(MSR_TSC_AUX, &tsc_aux,
+                                             ctxt)) != X86EMUL_OKAY )
+                        goto done;
+                    dst = ea;
+                    dst.val = tsc_aux;
+                    dst.bytes = 4;
+                    break;
+                }
 #ifdef HAVE_GAS_RDSEED
-            case 7: /* rdseed */
                 generate_exception_if(rep_prefix(), EXC_UD);
                 host_and_vcpu_must_have(rdseed);
                 dst = ea;
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -226,6 +226,7 @@ XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /
 XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation Instrs */
 XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
 XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */
+XEN_CPUFEATURE(RDPID,         6*32+22) /*A  RDPID instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
 XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */




[-- Attachment #2: x86emul-RDPID.patch --]
[-- Type: text/plain, Size: 3744 bytes --]

x86emul: support RDPID

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -158,6 +158,11 @@ static int read_msr(
     case 0xc0000080: /* EFER */
         *val = ctxt->addr_size > 32 ? 0x500 /* LME|LMA */ : 0;
         return X86EMUL_OKAY;
+
+    case 0xc0000103: /* TSC_AUX */
+#define TSC_AUX_VALUE 0xCACACACA
+        *val = TSC_AUX_VALUE;
+        return X86EMUL_OKAY;
     }
 
     return X86EMUL_UNHANDLEABLE;
@@ -1472,6 +1477,16 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing rdpid %ecx...");
+    instr[0] = 0xF3; instr[1] = 0x0f; instr[2] = 0xC7; instr[3] = 0xf9;
+    regs.eip = (unsigned long)&instr[0];
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (regs.ecx != TSC_AUX_VALUE) ||
+         (regs.eip != (unsigned long)&instr[4]) )
+        goto fail;
+    printf("okay\n");
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -60,9 +60,15 @@ int emul_test_cpuid(
     if ( leaf == 1 )
         res->c |= 1U << 22;
 
-    /* The emulator doesn't itself use ADCX/ADOX, so we can always run the test. */
+    /*
+     * The emulator doesn't itself use ADCX/ADOX/RDPID, so we can always run
+     * the respective tests.
+     */
     if ( leaf == 7 && subleaf == 0 )
+    {
         res->b |= 1U << 19;
+        res->c |= 1U << 22;
+    }
 
     return X86EMUL_OKAY;
 }
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1367,6 +1367,7 @@ static bool vcpu_has(
 #define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
 #define vcpu_has_clflushopt()  vcpu_has(         7, EBX, 23, ctxt, ops)
 #define vcpu_has_clwb()        vcpu_has(         7, EBX, 24, ctxt, ops)
+#define vcpu_has_rdpid()       vcpu_has(         7, ECX, 22, ctxt, ops)
 
 #define vcpu_must_have(feat) \
     generate_exception_if(!vcpu_has_##feat(), EXC_UD)
@@ -5782,8 +5783,23 @@ x86_emulate(
                 break;
 #endif
 
+            case 7: /* rdseed / rdpid */
+                if ( repe_prefix() ) /* rdpid */
+                {
+                    uint64_t tsc_aux;
+
+                    generate_exception_if(ea.type != OP_REG, EXC_UD);
+                    vcpu_must_have(rdpid);
+                    fail_if(!ops->read_msr);
+                    if ( (rc = ops->read_msr(MSR_TSC_AUX, &tsc_aux,
+                                             ctxt)) != X86EMUL_OKAY )
+                        goto done;
+                    dst = ea;
+                    dst.val = tsc_aux;
+                    dst.bytes = 4;
+                    break;
+                }
 #ifdef HAVE_GAS_RDSEED
-            case 7: /* rdseed */
                 generate_exception_if(rep_prefix(), EXC_UD);
                 host_and_vcpu_must_have(rdseed);
                 dst = ea;
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -226,6 +226,7 @@ XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /
 XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation Instrs */
 XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
 XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */
+XEN_CPUFEATURE(RDPID,         6*32+22) /*A  RDPID instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
 XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 8/8] x86emul: rename the no_writeback label
  2017-01-13 15:11 [PATCH 0/8] x86emul: support various ISA extensions Jan Beulich
                   ` (6 preceding siblings ...)
  2017-01-13 15:34 ` [PATCH 7/8] x86emul: support RDPID Jan Beulich
@ 2017-01-13 15:35 ` Jan Beulich
  2017-01-13 19:01   ` Andrew Cooper
  7 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-13 15:35 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 4088 bytes --]

This is to bring its name in line with what actually happens there.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -986,7 +986,7 @@ static inline void put_loop_count(
             if ( using_si ) _regs.r(si) = _regs._esi;                   \
             if ( using_di ) _regs.r(di) = _regs._edi;                   \
         }                                                               \
-        goto no_writeback;                                              \
+        goto complete_insn;                                             \
     }                                                                   \
     if ( max_reps > 1 && (_regs._eflags & EFLG_TF) &&                   \
          !is_branch_step(ctxt, ops) )                                   \
@@ -1015,7 +1015,7 @@ static void __put_rep_prefix(
     {                                                                   \
         __put_rep_prefix(&_regs, ctxt->regs, ad_bytes, reps_completed); \
         if ( unlikely(rc == X86EMUL_EXCEPTION) )                        \
-            goto no_writeback;                                          \
+            goto complete_insn;                                         \
     }                                                                   \
 })
 
@@ -2661,7 +2661,7 @@ x86_emulate(
         state.caller = NULL;
 #endif
         if ( rc == X86EMUL_DONE )
-            goto no_writeback;
+            goto complete_insn;
         if ( rc != X86EMUL_OKAY )
             return rc;
     }
@@ -4281,7 +4281,7 @@ x86_emulate(
         if ( rc != 0 )
         {
             if ( rc == X86EMUL_DONE )
-                goto no_writeback;
+                goto complete_insn;
             goto done;
         }
         break;
@@ -4657,7 +4657,7 @@ x86_emulate(
             _regs._eflags &= ~EFLG_AC;
             if ( modrm == 0xcb )
                 _regs._eflags |= EFLG_AC;
-            goto no_writeback;
+            goto complete_insn;
 
 #ifdef __XEN__
         case 0xd1: /* xsetbv */
@@ -4669,7 +4669,7 @@ x86_emulate(
                                   handle_xsetbv(_regs._ecx,
                                                 _regs._eax | (_regs.rdx << 32)),
                                   EXC_GP, 0);
-            goto no_writeback;
+            goto complete_insn;
 #endif
 
         case 0xd4: /* vmfunc */
@@ -4678,7 +4678,7 @@ x86_emulate(
             fail_if(!ops->vmfunc);
             if ( (rc = ops->vmfunc(ctxt)) != X86EMUL_OKAY )
                 goto done;
-            goto no_writeback;
+            goto complete_insn;
 
         case 0xd5: /* xend */
             generate_exception_if(vex.pfx, EXC_UD);
@@ -4692,7 +4692,7 @@ x86_emulate(
                                   EXC_UD);
             /* Neither HLE nor RTM can be active when we get here. */
             _regs._eflags |= EFLG_ZF;
-            goto no_writeback;
+            goto complete_insn;
 
         case 0xdf: /* invlpga */
             generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
@@ -4701,7 +4701,7 @@ x86_emulate(
             if ( (rc = ops->invlpg(x86_seg_none, truncate_ea(_regs.r(ax)),
                                    ctxt)) )
                 goto done;
-            goto no_writeback;
+            goto complete_insn;
 
         case 0xf9: /* rdtscp */
         {
@@ -4749,7 +4749,7 @@ x86_emulate(
                 base += sizeof(zero);
                 limit -= sizeof(zero);
             }
-            goto no_writeback;
+            goto complete_insn;
         }
         }
 
@@ -6219,8 +6219,7 @@ x86_emulate(
         break;
     }
 
- no_writeback: /* Commit shadow register state. */
-
+ complete_insn: /* Commit shadow register state. */
     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
     if ( !mode_64bit() )
         _regs.r(ip) = _regs._eip;




[-- Attachment #2: x86emul-writeback-label.patch --]
[-- Type: text/plain, Size: 4124 bytes --]

x86emul: rename the no_writeback label

This is to bring its name in line with what actually happens there.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -986,7 +986,7 @@ static inline void put_loop_count(
             if ( using_si ) _regs.r(si) = _regs._esi;                   \
             if ( using_di ) _regs.r(di) = _regs._edi;                   \
         }                                                               \
-        goto no_writeback;                                              \
+        goto complete_insn;                                             \
     }                                                                   \
     if ( max_reps > 1 && (_regs._eflags & EFLG_TF) &&                   \
          !is_branch_step(ctxt, ops) )                                   \
@@ -1015,7 +1015,7 @@ static void __put_rep_prefix(
     {                                                                   \
         __put_rep_prefix(&_regs, ctxt->regs, ad_bytes, reps_completed); \
         if ( unlikely(rc == X86EMUL_EXCEPTION) )                        \
-            goto no_writeback;                                          \
+            goto complete_insn;                                         \
     }                                                                   \
 })
 
@@ -2661,7 +2661,7 @@ x86_emulate(
         state.caller = NULL;
 #endif
         if ( rc == X86EMUL_DONE )
-            goto no_writeback;
+            goto complete_insn;
         if ( rc != X86EMUL_OKAY )
             return rc;
     }
@@ -4281,7 +4281,7 @@ x86_emulate(
         if ( rc != 0 )
         {
             if ( rc == X86EMUL_DONE )
-                goto no_writeback;
+                goto complete_insn;
             goto done;
         }
         break;
@@ -4657,7 +4657,7 @@ x86_emulate(
             _regs._eflags &= ~EFLG_AC;
             if ( modrm == 0xcb )
                 _regs._eflags |= EFLG_AC;
-            goto no_writeback;
+            goto complete_insn;
 
 #ifdef __XEN__
         case 0xd1: /* xsetbv */
@@ -4669,7 +4669,7 @@ x86_emulate(
                                   handle_xsetbv(_regs._ecx,
                                                 _regs._eax | (_regs.rdx << 32)),
                                   EXC_GP, 0);
-            goto no_writeback;
+            goto complete_insn;
 #endif
 
         case 0xd4: /* vmfunc */
@@ -4678,7 +4678,7 @@ x86_emulate(
             fail_if(!ops->vmfunc);
             if ( (rc = ops->vmfunc(ctxt)) != X86EMUL_OKAY )
                 goto done;
-            goto no_writeback;
+            goto complete_insn;
 
         case 0xd5: /* xend */
             generate_exception_if(vex.pfx, EXC_UD);
@@ -4692,7 +4692,7 @@ x86_emulate(
                                   EXC_UD);
             /* Neither HLE nor RTM can be active when we get here. */
             _regs._eflags |= EFLG_ZF;
-            goto no_writeback;
+            goto complete_insn;
 
         case 0xdf: /* invlpga */
             generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
@@ -4701,7 +4701,7 @@ x86_emulate(
             if ( (rc = ops->invlpg(x86_seg_none, truncate_ea(_regs.r(ax)),
                                    ctxt)) )
                 goto done;
-            goto no_writeback;
+            goto complete_insn;
 
         case 0xf9: /* rdtscp */
         {
@@ -4749,7 +4749,7 @@ x86_emulate(
                 base += sizeof(zero);
                 limit -= sizeof(zero);
             }
-            goto no_writeback;
+            goto complete_insn;
         }
         }
 
@@ -6219,8 +6219,7 @@ x86_emulate(
         break;
     }
 
- no_writeback: /* Commit shadow register state. */
-
+ complete_insn: /* Commit shadow register state. */
     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
     if ( !mode_64bit() )
         _regs.r(ip) = _regs._eip;

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/8] x86emul: support POPCNT
  2017-01-13 15:30 ` [PATCH 1/8] x86emul: support POPCNT Jan Beulich
@ 2017-01-13 16:31   ` Andrew Cooper
  0 siblings, 0 replies; 29+ messages in thread
From: Andrew Cooper @ 2017-01-13 16:31 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 13/01/17 15:30, Jan Beulich wrote:
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/8] x86emul: support ADCX/ADOX
  2017-01-13 15:31 ` [PATCH 2/8] x86emul: support ADCX/ADOX Jan Beulich
@ 2017-01-13 16:34   ` Andrew Cooper
  0 siblings, 0 replies; 29+ messages in thread
From: Andrew Cooper @ 2017-01-13 16:34 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 13/01/17 15:31, Jan Beulich wrote:
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/8] x86emul: support BMI1 insns
  2017-01-13 15:31 ` [PATCH 3/8] x86emul: support BMI1 insns Jan Beulich
@ 2017-01-13 17:40   ` Andrew Cooper
  2017-01-16 11:19     ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Andrew Cooper @ 2017-01-13 17:40 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 13/01/17 15:31, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -676,6 +676,16 @@ do{ asm volatile (
>  #define __emulate_1op_8byte(_op, _dst, _eflags)
>  #endif /* __i386__ */
>  
> +#define emulate_stub(dst, src...) do {                                  \
> +    unsigned long tmp;                                                  \
> +    asm volatile ( _PRE_EFLAGS("[efl]", "[msk]", "[tmp]")               \
> +                   "call *%[stub];"                                     \
> +                   _POST_EFLAGS("[efl]", "[msk]", "[tmp]")              \
> +                   : dst, [tmp] "=&r" (tmp), [efl] "+g" (_regs._eflags) \
> +                   : [stub] "r" (stub.func),                            \
> +                     [msk] "i" (EFLAGS_MASK), ## src );                 \
> +} while (0)
> +
>  /* Fetch next part of the instruction being emulated. */
>  #define insn_fetch_bytes(_size)                                         \
>  ({ unsigned long _x = 0, _ip = state->ip;                               \
> @@ -2295,7 +2305,10 @@ x86_decode(
>                          }
>                      }
>                      else
> +                    {
> +                        ASSERT(op_bytes == 4);
>                          vex.b = 1;
> +                    }
>                      switch ( b )
>                      {
>                      case 0x62:
> @@ -5866,6 +5879,67 @@ x86_emulate(
>          break;
>  #endif
>  
> +    case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
> +    case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
> +    {
> +        uint8_t *buf = get_stub(stub);
> +        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
> +
> +        host_and_vcpu_must_have(bmi1);
> +        generate_exception_if(vex.l, EXC_UD);

The manual also states #UD if VEX.W is set.

> +
> +        buf[0] = 0xc4;
> +        *pvex = vex;
> +        pvex->b = 1;
> +        pvex->r = 1;
> +        pvex->reg = ~0; /* rAX */
> +        buf[3] = b;
> +        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
> +        buf[5] = 0xc3;
> +
> +        src.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
> +                                  &_regs, 0);

Given this construct, and several GPR-encoded vex instructions, how
about a decode_vex_gpr() wrapper?

> +        emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" (*src.reg));
> +
> +        put_stub(stub);
> +        break;
> +    }
> +
> +    case X86EMUL_OPC_VEX(0x0f38, 0xf3): /* Grp 17 */
> +    {
> +        uint8_t *buf = get_stub(stub);
> +        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
> +
> +        switch ( modrm_reg & 7 )
> +        {
> +        case 1: /* blsr r,r/m */
> +        case 2: /* blsmsk r,r/m */
> +        case 3: /* blsi r,r/m */
> +            host_and_vcpu_must_have(bmi1);
> +            break;
> +        default:
> +            goto cannot_emulate;
> +        }
> +
> +        generate_exception_if(vex.l, EXC_UD);
> +
> +        buf[0] = 0xc4;
> +        *pvex = vex;
> +        pvex->b = 1;
> +        pvex->r = 1;
> +        pvex->reg = ~0; /* rAX */
> +        buf[3] = b;
> +        buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
> +        buf[5] = 0xc3;
> +
> +        dst.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
> +                                  &_regs, 0);
> +        emulate_stub("=&a" (dst.val), "c" (&src.val));
> +
> +        put_stub(stub);
> +        break;
> +    }
> +
>      case X86EMUL_OPC_66(0x0f38, 0xf6): /* adcx r/m,r */
>      case X86EMUL_OPC_F3(0x0f38, 0xf6): /* adox r/m,r */
>      {
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -57,6 +57,7 @@
>  #define cpu_has_xsave           boot_cpu_has(X86_FEATURE_XSAVE)
>  #define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
>  #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
> +#define cpu_has_bmi1            boot_cpu_has(X86_FEATURE_BMI1)
>  #define cpu_has_mpx             boot_cpu_has(X86_FEATURE_MPX)
>  #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
>  #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)

After trying this out, we clearly need to alter the position on VEX
prefixes.  VEX encoded GPR instructions don't fall within the previous
assumptions made about the dependences of VEX instructions.

~Andrew

diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
index 6212e4f..d4210d5 100755
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -234,9 +234,11 @@ def crunch_numbers(state):
         XSAVE: [XSAVEOPT, XSAVEC, XGETBV1, XSAVES,
                 AVX, MPX, PKU, LWP],
 
-        # AVX is taken to mean hardware support for VEX encoded
instructions,
-        # 256bit registers, and the instructions themselves.  Each of these
-        # subsequent instruction groups may only be VEX encoded.
+        # AVX is taken to mean hardware support for 256bit registers,
and the
+        # instructions themselves.  It does not related to the VEX
prefix (In
+        # particular, most BMI{1,2} instructions may only be VEX
encoded but
+        # operate on GPRs rather than YMM register and can be used without
+        # enabling xstate).
         AVX: [FMA, FMA4, F16C, AVX2, XOP],
 
         # CX16 is only encodable in Long Mode.  LAHF_LM indicates that the


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/8] x86emul: support BMI2 insns
  2017-01-13 15:32 ` [PATCH 4/8] x86emul: support BMI2 insns Jan Beulich
@ 2017-01-13 18:20   ` Andrew Cooper
  2017-01-16 11:32     ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Andrew Cooper @ 2017-01-13 18:20 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 13/01/17 15:32, Jan Beulich wrote:
> Note that the adjustment to the mode_64bit() definition is so that we
> can avoid "#ifdef __x86_64__" around the 64-bit asm() portions. An
> alternative would be single asm()s with a conditional branch over the
> (manually encoded) REX64 prefix.

This presumably relying on sensible dead-code-elimitation to compile? 
Does this offer any further opportunities with removing other ifdefs?

(Either way, this seems cleaner than embedding a jmp in asm).

> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -707,7 +707,11 @@ do{ asm volatile (
>  })
>  #define truncate_ea(ea) truncate_word((ea), ad_bytes)
>  
> -#define mode_64bit() (ctxt->addr_size == 64)
> +#ifdef __x86_64__
> +# define mode_64bit() (ctxt->addr_size == 64)
> +#else
> +# define mode_64bit() false
> +#endif
>  
>  #define fail_if(p)                                      \
>  do {                                                    \
> @@ -1353,6 +1357,7 @@ static bool vcpu_has(
>  #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
>  #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
>  #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
> +#define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
>  #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
>  #define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
>  #define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
> @@ -5880,12 +5885,21 @@ x86_emulate(
>  #endif
>  
>      case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
> +    case X86EMUL_OPC_VEX(0x0f38, 0xf5):    /* bzhi r,r/m,r */
> +    case X86EMUL_OPC_VEX_F3(0x0f38, 0xf5): /* pext r/m,r,r */
> +    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf5): /* pdep r/m,r,r */
>      case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0xf7): /* shlx r,r/m,r */
> +    case X86EMUL_OPC_VEX_F3(0x0f38, 0xf7): /* sarx r,r/m,r */
> +    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf7): /* shrx r,r/m,r */
>      {
>          uint8_t *buf = get_stub(stub);
>          typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
>  
> -        host_and_vcpu_must_have(bmi1);
> +        if ( b == 0xf5 || vex.pfx )
> +            host_and_vcpu_must_have(bmi2);
> +        else
> +            host_and_vcpu_must_have(bmi1);
>          generate_exception_if(vex.l, EXC_UD);
>  
>          buf[0] = 0xc4;
> @@ -5973,6 +5987,33 @@ x86_emulate(
>          break;
>      }
>  
> +    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf6): /* mulx r/m,r,r */
> +        vcpu_must_have(bmi2);
> +        generate_exception_if(vex.l, EXC_UD);

vex.w again.

> +        ea.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
> +                                 &_regs, 0);
> +        if ( mode_64bit() && vex.w )
> +            asm ( "mulq %3" : "=a" (*ea.reg), "=d" (dst.val)
> +                            : "0" (src.val), "rm" (_regs.r(dx)) );
> +        else
> +            asm ( "mull %3" : "=a" (*ea.reg), "=d" (dst.val)
> +                            : "0" ((uint32_t)src.val), "rm" (_regs._edx) );
> +        break;
> +
> +    case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
> +        vcpu_must_have(bmi2);
> +        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);

What does this vex.reg check correspond to?  I can't locate anything
relevant in the manuals.

~Andrew

> +        if ( ea.type == OP_REG )
> +            src.val = *ea.reg;
> +        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
> +                                   ctxt, ops)) != X86EMUL_OKAY )
> +            goto done;
> +        if ( mode_64bit() && vex.w )
> +            asm ( "rorq %b1,%0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
> +        else
> +            asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
> +        break;
> +
>      default:
>          goto cannot_emulate;
>      }
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -58,6 +58,7 @@
>  #define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
>  #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
>  #define cpu_has_bmi1            boot_cpu_has(X86_FEATURE_BMI1)
> +#define cpu_has_bmi2            boot_cpu_has(X86_FEATURE_BMI2)
>  #define cpu_has_mpx             boot_cpu_has(X86_FEATURE_MPX)
>  #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
>  #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)
>
>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/8] x86emul: support TBM insns
  2017-01-13 15:32 ` [PATCH 5/8] x86emul: support TBM insns Jan Beulich
@ 2017-01-13 18:48   ` Andrew Cooper
  2017-01-16 11:36     ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Andrew Cooper @ 2017-01-13 18:48 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 13/01/17 15:32, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -1355,6 +1355,7 @@ static bool vcpu_has(
>  #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
>  #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
>  #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
> +#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
>  #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
>  #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
>  #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
> @@ -6014,6 +6015,85 @@ x86_emulate(
>              asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
>          break;
>  
> +    case X86EMUL_OPC(0x8f09, 0x01): /* XOP Grp1 */

Surely this calls for the introduction of X86EMUL_OPC_XOP_* to match
their VEX/EVEX counterparts?

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/8] x86emul: support RDRAND/RDSEED
  2017-01-13 15:34 ` [PATCH 6/8] x86emul: support RDRAND/RDSEED Jan Beulich
@ 2017-01-13 18:55   ` Andrew Cooper
  0 siblings, 0 replies; 29+ messages in thread
From: Andrew Cooper @ 2017-01-13 18:55 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 13/01/17 15:34, Jan Beulich wrote:
> @@ -5737,14 +5739,82 @@ x86_emulate(
>          dst.val = src.val;
>          break;
>  
> -    case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
> +    case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */ {

Style (while you are changing this).

Otherwise, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/8] x86emul: support RDPID
  2017-01-13 15:34 ` [PATCH 7/8] x86emul: support RDPID Jan Beulich
@ 2017-01-13 19:00   ` Andrew Cooper
  0 siblings, 0 replies; 29+ messages in thread
From: Andrew Cooper @ 2017-01-13 19:00 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 13/01/17 15:34, Jan Beulich wrote:
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 8/8] x86emul: rename the no_writeback label
  2017-01-13 15:35 ` [PATCH 8/8] x86emul: rename the no_writeback label Jan Beulich
@ 2017-01-13 19:01   ` Andrew Cooper
  0 siblings, 0 replies; 29+ messages in thread
From: Andrew Cooper @ 2017-01-13 19:01 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 13/01/17 15:35, Jan Beulich wrote:
> This is to bring its name in line with what actually happens there.
>
> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/8] x86emul: support BMI1 insns
  2017-01-13 17:40   ` Andrew Cooper
@ 2017-01-16 11:19     ` Jan Beulich
  2017-01-16 11:59       ` Andrew Cooper
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-16 11:19 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 13.01.17 at 18:40, <andrew.cooper3@citrix.com> wrote:
> On 13/01/17 15:31, Jan Beulich wrote:
>> @@ -5866,6 +5879,67 @@ x86_emulate(
>>          break;
>>  #endif
>>  
>> +    case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
>> +    case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
>> +    {
>> +        uint8_t *buf = get_stub(stub);
>> +        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
>> +
>> +        host_and_vcpu_must_have(bmi1);
>> +        generate_exception_if(vex.l, EXC_UD);
> 
> The manual also states #UD if VEX.W is set.

This is very clearly a doc error: For one, is doesn't _also_ state this,
but says nothing about VEX.L. And the instruction encodings list .W1
variants (as expected) to encode 64-bit operations.

>> +
>> +        buf[0] = 0xc4;
>> +        *pvex = vex;
>> +        pvex->b = 1;
>> +        pvex->r = 1;
>> +        pvex->reg = ~0; /* rAX */
>> +        buf[3] = b;
>> +        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
>> +        buf[5] = 0xc3;
>> +
>> +        src.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
>> +                                  &_regs, 0);
> 
> Given this construct, and several GPR-encoded vex instructions, how
> about a decode_vex_gpr() wrapper?

That's a good idea.

>> --- a/xen/include/asm-x86/cpufeature.h
>> +++ b/xen/include/asm-x86/cpufeature.h
>> @@ -57,6 +57,7 @@
>>  #define cpu_has_xsave           boot_cpu_has(X86_FEATURE_XSAVE)
>>  #define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
>>  #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
>> +#define cpu_has_bmi1            boot_cpu_has(X86_FEATURE_BMI1)
>>  #define cpu_has_mpx             boot_cpu_has(X86_FEATURE_MPX)
>>  #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
>>  #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)
> 
> After trying this out, we clearly need to alter the position on VEX
> prefixes.  VEX encoded GPR instructions don't fall within the previous
> assumptions made about the dependences of VEX instructions.

Should I fold this in, or do you want to submit it as a separate
patch?

Jan

> --- a/xen/tools/gen-cpuid.py
> +++ b/xen/tools/gen-cpuid.py
> @@ -234,9 +234,11 @@ def crunch_numbers(state):
>          XSAVE: [XSAVEOPT, XSAVEC, XGETBV1, XSAVES,
>                  AVX, MPX, PKU, LWP],
>  
> -        # AVX is taken to mean hardware support for VEX encoded
> instructions,
> -        # 256bit registers, and the instructions themselves.  Each of these
> -        # subsequent instruction groups may only be VEX encoded.
> +        # AVX is taken to mean hardware support for 256bit registers,
> and the
> +        # instructions themselves.  It does not related to the VEX
> prefix (In
> +        # particular, most BMI{1,2} instructions may only be VEX
> encoded but
> +        # operate on GPRs rather than YMM register and can be used without
> +        # enabling xstate).
>          AVX: [FMA, FMA4, F16C, AVX2, XOP],
>  
>          # CX16 is only encodable in Long Mode.  LAHF_LM indicates that the




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/8] x86emul: support BMI2 insns
  2017-01-13 18:20   ` Andrew Cooper
@ 2017-01-16 11:32     ` Jan Beulich
  0 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2017-01-16 11:32 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 13.01.17 at 19:20, <andrew.cooper3@citrix.com> wrote:
> On 13/01/17 15:32, Jan Beulich wrote:
>> Note that the adjustment to the mode_64bit() definition is so that we
>> can avoid "#ifdef __x86_64__" around the 64-bit asm() portions. An
>> alternative would be single asm()s with a conditional branch over the
>> (manually encoded) REX64 prefix.
> 
> This presumably relying on sensible dead-code-elimitation to compile? 

Yes.

> Does this offer any further opportunities with removing other ifdefs?

When I had written this, it looked like it might, but I didn't create
any follow-up patches so far.

>> @@ -5973,6 +5987,33 @@ x86_emulate(
>>          break;
>>      }
>>  
>> +    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf6): /* mulx r/m,r,r */
>> +        vcpu_must_have(bmi2);
>> +        generate_exception_if(vex.l, EXC_UD);
> 
> vex.w again.

Nope (see also the textual description, which actually mentions
VEX.L).

>> +        ea.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
>> +                                 &_regs, 0);
>> +        if ( mode_64bit() && vex.w )
>> +            asm ( "mulq %3" : "=a" (*ea.reg), "=d" (dst.val)
>> +                            : "0" (src.val), "rm" (_regs.r(dx)) );
>> +        else
>> +            asm ( "mull %3" : "=a" (*ea.reg), "=d" (dst.val)
>> +                            : "0" ((uint32_t)src.val), "rm" (_regs._edx) );
>> +        break;
>> +
>> +    case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
>> +        vcpu_must_have(bmi2);
>> +        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
> 
> What does this vex.reg check correspond to?  I can't locate anything
> relevant in the manuals.

Indeed the manual says nothing, but this again appears to be a
doc error: The instruction does #UD in that case. I'll add a word
to the commit message.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/8] x86emul: support TBM insns
  2017-01-13 18:48   ` Andrew Cooper
@ 2017-01-16 11:36     ` Jan Beulich
  2017-01-16 14:52       ` Andrew Cooper
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-16 11:36 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 13.01.17 at 19:48, <andrew.cooper3@citrix.com> wrote:
> On 13/01/17 15:32, Jan Beulich wrote:
>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>> @@ -1355,6 +1355,7 @@ static bool vcpu_has(
>>  #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
>>  #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
>>  #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
>> +#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
>>  #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
>>  #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
>>  #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
>> @@ -6014,6 +6015,85 @@ x86_emulate(
>>              asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
>>          break;
>>  
>> +    case X86EMUL_OPC(0x8f09, 0x01): /* XOP Grp1 */
> 
> Surely this calls for the introduction of X86EMUL_OPC_XOP_* to match
> their VEX/EVEX counterparts?

Do really you think

    case X86EMUL_OPC_XOP(09, 0x01): /* XOP Grp1 */

or

    case X86EMUL_OPC_XOP09(0x01): /* XOP Grp1 */

are any better? Iirc you had asked this same question already
when the opcode canonicalization patch was under review. The
situation hasn't changed: The nothing/VEX/EVEX distinction is
needed because the same base opcode may have (slightly or
significantly) different meaning depending on which of the three
(or four, if we also considered MVEX) encodings are being used.
There's no such duplicate meaning for XOP encodings.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/8] x86emul: support BMI1 insns
  2017-01-16 11:19     ` Jan Beulich
@ 2017-01-16 11:59       ` Andrew Cooper
  2017-01-16 12:43         ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Andrew Cooper @ 2017-01-16 11:59 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 16/01/17 11:19, Jan Beulich wrote:
>>>> On 13.01.17 at 18:40, <andrew.cooper3@citrix.com> wrote:
>> On 13/01/17 15:31, Jan Beulich wrote:
>>> @@ -5866,6 +5879,67 @@ x86_emulate(
>>>          break;
>>>  #endif
>>>  
>>> +    case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
>>> +    case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
>>> +    {
>>> +        uint8_t *buf = get_stub(stub);
>>> +        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
>>> +
>>> +        host_and_vcpu_must_have(bmi1);
>>> +        generate_exception_if(vex.l, EXC_UD);
>> The manual also states #UD if VEX.W is set.
> This is very clearly a doc error: For one, is doesn't _also_ state this,
> but says nothing about VEX.L. And the instruction encodings list .W1
> variants (as expected) to encode 64-bit operations.

VEX.L != 0 is called out, but only in the text, not the exception list.

The exact text is:

"This instruction is not supported in real mode and virtual-8086 mode.
The operand size is always 32 bits if not in 64-bit mode. In 64-bit mode
operand size 64 requires VEX.W1. VEX.W1 is ignored in non-64-bit modes.
An attempt to execute this instruction with VEX.L not equal to 0 will
cause #UD."

with:

"#UD If VEX.W = 1"

in the exception list.

I am confused about the references to VEX.W1 in the text, because it
doesn't match any described VEX fields.  At a guess, I'd say it should
be referring to VEX.B which control operand size, while VEX.W is an
opcode extention.

>
>>> --- a/xen/include/asm-x86/cpufeature.h
>>> +++ b/xen/include/asm-x86/cpufeature.h
>>> @@ -57,6 +57,7 @@
>>>  #define cpu_has_xsave           boot_cpu_has(X86_FEATURE_XSAVE)
>>>  #define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
>>>  #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
>>> +#define cpu_has_bmi1            boot_cpu_has(X86_FEATURE_BMI1)
>>>  #define cpu_has_mpx             boot_cpu_has(X86_FEATURE_MPX)
>>>  #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
>>>  #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)
>> After trying this out, we clearly need to alter the position on VEX
>> prefixes.  VEX encoded GPR instructions don't fall within the previous
>> assumptions made about the dependences of VEX instructions.
> Should I fold this in, or do you want to submit it as a separate
> patch?

I will submit a separate patch.  I don't think it changes any of the
currently-dependent content.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/8] x86emul: support BMI1 insns
  2017-01-16 11:59       ` Andrew Cooper
@ 2017-01-16 12:43         ` Jan Beulich
  2017-01-16 12:57           ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-16 12:43 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 16.01.17 at 12:59, <andrew.cooper3@citrix.com> wrote:
> On 16/01/17 11:19, Jan Beulich wrote:
>>>>> On 13.01.17 at 18:40, <andrew.cooper3@citrix.com> wrote:
>>> On 13/01/17 15:31, Jan Beulich wrote:
>>>> @@ -5866,6 +5879,67 @@ x86_emulate(
>>>>          break;
>>>>  #endif
>>>>  
>>>> +    case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
>>>> +    case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
>>>> +    {
>>>> +        uint8_t *buf = get_stub(stub);
>>>> +        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
>>>> +
>>>> +        host_and_vcpu_must_have(bmi1);
>>>> +        generate_exception_if(vex.l, EXC_UD);
>>> The manual also states #UD if VEX.W is set.
>> This is very clearly a doc error: For one, is doesn't _also_ state this,
>> but says nothing about VEX.L. And the instruction encodings list .W1
>> variants (as expected) to encode 64-bit operations.
> 
> VEX.L != 0 is called out, but only in the text, not the exception list.
> 
> The exact text is:
> 
> "This instruction is not supported in real mode and virtual-8086 mode.
> The operand size is always 32 bits if not in 64-bit mode. In 64-bit mode
> operand size 64 requires VEX.W1. VEX.W1 is ignored in non-64-bit modes.
> An attempt to execute this instruction with VEX.L not equal to 0 will
> cause #UD."
> 
> with:
> 
> "#UD If VEX.W = 1"
> 
> in the exception list.
> 
> I am confused about the references to VEX.W1 in the text, because it
> doesn't match any described VEX fields.  At a guess, I'd say it should
> be referring to VEX.B which control operand size, while VEX.W is an
> opcode extention.

VEX.W1 means VEX.W set to 1 (VEX.W0 similarly means VEX.W set to
zero). And there's no VEX.B afaik. VEX.W can serve both purposes -
operand size and opcode extension. As there's no other way to encode
32- vs 64-bit operand size, VEX.W serves this purpose for integer
instructions.

For SIMD instructions VEX.L typically encodes operand size; iirc VEX.W
(or maybe just EVEX.W) in some cases encodes operand element size.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/8] x86emul: support BMI1 insns
  2017-01-16 12:43         ` Jan Beulich
@ 2017-01-16 12:57           ` Jan Beulich
  2017-01-16 13:51             ` Andrew Cooper
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-16 12:57 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 16.01.17 at 13:43, <JBeulich@suse.com> wrote:
>>>> On 16.01.17 at 12:59, <andrew.cooper3@citrix.com> wrote:
>> On 16/01/17 11:19, Jan Beulich wrote:
>>>>>> On 13.01.17 at 18:40, <andrew.cooper3@citrix.com> wrote:
>>>> On 13/01/17 15:31, Jan Beulich wrote:
>>>>> @@ -5866,6 +5879,67 @@ x86_emulate(
>>>>>          break;
>>>>>  #endif
>>>>>  
>>>>> +    case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
>>>>> +    case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
>>>>> +    {
>>>>> +        uint8_t *buf = get_stub(stub);
>>>>> +        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
>>>>> +
>>>>> +        host_and_vcpu_must_have(bmi1);
>>>>> +        generate_exception_if(vex.l, EXC_UD);
>>>> The manual also states #UD if VEX.W is set.
>>> This is very clearly a doc error: For one, is doesn't _also_ state this,
>>> but says nothing about VEX.L. And the instruction encodings list .W1
>>> variants (as expected) to encode 64-bit operations.
>> 
>> VEX.L != 0 is called out, but only in the text, not the exception list.
>> 
>> The exact text is:
>> 
>> "This instruction is not supported in real mode and virtual-8086 mode.
>> The operand size is always 32 bits if not in 64-bit mode. In 64-bit mode
>> operand size 64 requires VEX.W1. VEX.W1 is ignored in non-64-bit modes.
>> An attempt to execute this instruction with VEX.L not equal to 0 will
>> cause #UD."
>> 
>> with:
>> 
>> "#UD If VEX.W = 1"
>> 
>> in the exception list.
>> 
>> I am confused about the references to VEX.W1 in the text, because it
>> doesn't match any described VEX fields.  At a guess, I'd say it should
>> be referring to VEX.B which control operand size, while VEX.W is an
>> opcode extention.
> 
> VEX.W1 means VEX.W set to 1 (VEX.W0 similarly means VEX.W set to
> zero). And there's no VEX.B afaik.

Oops, of course there is, just that it has nothing to do with operand
size (it rather provide the top bit of the (base) register number.

Jan

> VEX.W can serve both purposes -
> operand size and opcode extension. As there's no other way to encode
> 32- vs 64-bit operand size, VEX.W serves this purpose for integer
> instructions.
> 
> For SIMD instructions VEX.L typically encodes operand size; iirc VEX.W
> (or maybe just EVEX.W) in some cases encodes operand element size.
> 
> Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/8] x86emul: support BMI1 insns
  2017-01-16 12:57           ` Jan Beulich
@ 2017-01-16 13:51             ` Andrew Cooper
  2017-01-16 13:58               ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Andrew Cooper @ 2017-01-16 13:51 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 16/01/17 12:57, Jan Beulich wrote:
>>>> On 16.01.17 at 13:43, <JBeulich@suse.com> wrote:
>>>>> On 16.01.17 at 12:59, <andrew.cooper3@citrix.com> wrote:
>>> On 16/01/17 11:19, Jan Beulich wrote:
>>>>>>> On 13.01.17 at 18:40, <andrew.cooper3@citrix.com> wrote:
>>>>> On 13/01/17 15:31, Jan Beulich wrote:
>>>>>> @@ -5866,6 +5879,67 @@ x86_emulate(
>>>>>>          break;
>>>>>>  #endif
>>>>>>  
>>>>>> +    case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
>>>>>> +    case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
>>>>>> +    {
>>>>>> +        uint8_t *buf = get_stub(stub);
>>>>>> +        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
>>>>>> +
>>>>>> +        host_and_vcpu_must_have(bmi1);
>>>>>> +        generate_exception_if(vex.l, EXC_UD);
>>>>> The manual also states #UD if VEX.W is set.
>>>> This is very clearly a doc error: For one, is doesn't _also_ state this,
>>>> but says nothing about VEX.L. And the instruction encodings list .W1
>>>> variants (as expected) to encode 64-bit operations.
>>> VEX.L != 0 is called out, but only in the text, not the exception list.
>>>
>>> The exact text is:
>>>
>>> "This instruction is not supported in real mode and virtual-8086 mode.
>>> The operand size is always 32 bits if not in 64-bit mode. In 64-bit mode
>>> operand size 64 requires VEX.W1. VEX.W1 is ignored in non-64-bit modes.
>>> An attempt to execute this instruction with VEX.L not equal to 0 will
>>> cause #UD."
>>>
>>> with:
>>>
>>> "#UD If VEX.W = 1"
>>>
>>> in the exception list.
>>>
>>> I am confused about the references to VEX.W1 in the text, because it
>>> doesn't match any described VEX fields.  At a guess, I'd say it should
>>> be referring to VEX.B which control operand size, while VEX.W is an
>>> opcode extention.
>> VEX.W1 means VEX.W set to 1 (VEX.W0 similarly means VEX.W set to
>> zero). And there's no VEX.B afaik.
> Oops, of course there is, just that it has nothing to do with operand
> size (it rather provide the top bit of the (base) register number.

Right.  What happens in reality is this:

--- Xen Test Framework ---
Environment: HVM 32bit (No paging)
Test VEX.W matching mode:
  andn cccca5a5, ff00ff00 = 00cc00a5
Test VEX.W opposite to mode:
  andn cccca5a5, ff00ff00 = 00cc00a5
Test result: SUCCESS

--- Xen Test Framework ---
Environment: HVM 64bit (Long mode 4 levels)
Test VEX.W matching mode:
  andn cccca5a5cccca5a5, ff00ff00ff00ff00 = 00cc00a500cc00a5
Test VEX.W opposite to mode:
  andn cccca5a5cccca5a5, ff00ff00ff00ff00 = 0000000000cc00a5
Test result: SUCCESS

So VEX.W is ignored in 32bit (i.e. doesn't raise #UD), and *does* cause
64bit mode to operate on 32bit operands, contrary to the manual.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/8] x86emul: support BMI1 insns
  2017-01-16 13:51             ` Andrew Cooper
@ 2017-01-16 13:58               ` Jan Beulich
  2017-01-16 14:17                 ` Andrew Cooper
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2017-01-16 13:58 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 16.01.17 at 14:51, <andrew.cooper3@citrix.com> wrote:
> Right.  What happens in reality is this:
> 
> --- Xen Test Framework ---
> Environment: HVM 32bit (No paging)
> Test VEX.W matching mode:
>   andn cccca5a5, ff00ff00 = 00cc00a5
> Test VEX.W opposite to mode:
>   andn cccca5a5, ff00ff00 = 00cc00a5
> Test result: SUCCESS
> 
> --- Xen Test Framework ---
> Environment: HVM 64bit (Long mode 4 levels)
> Test VEX.W matching mode:
>   andn cccca5a5cccca5a5, ff00ff00ff00ff00 = 00cc00a500cc00a5
> Test VEX.W opposite to mode:
>   andn cccca5a5cccca5a5, ff00ff00ff00ff00 = 0000000000cc00a5
> Test result: SUCCESS
> 
> So VEX.W is ignored in 32bit (i.e. doesn't raise #UD), and *does* cause
> 64bit mode to operate on 32bit operands, contrary to the manual.

Doesn't look so to me: The first result is a 64-bit one, and I'd expect
VEX.W=1 to be "matching mode".

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/8] x86emul: support BMI1 insns
  2017-01-16 13:58               ` Jan Beulich
@ 2017-01-16 14:17                 ` Andrew Cooper
  2017-01-16 15:43                   ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Andrew Cooper @ 2017-01-16 14:17 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 16/01/17 13:58, Jan Beulich wrote:
>>>> On 16.01.17 at 14:51, <andrew.cooper3@citrix.com> wrote:
>> Right.  What happens in reality is this:
>>
>> --- Xen Test Framework ---
>> Environment: HVM 32bit (No paging)
>> Test VEX.W matching mode:
>>   andn cccca5a5, ff00ff00 = 00cc00a5
>> Test VEX.W opposite to mode:
>>   andn cccca5a5, ff00ff00 = 00cc00a5
>> Test result: SUCCESS
>>
>> --- Xen Test Framework ---
>> Environment: HVM 64bit (Long mode 4 levels)
>> Test VEX.W matching mode:
>>   andn cccca5a5cccca5a5, ff00ff00ff00ff00 = 00cc00a500cc00a5
>> Test VEX.W opposite to mode:
>>   andn cccca5a5cccca5a5, ff00ff00ff00ff00 = 0000000000cc00a5
>> Test result: SUCCESS
>>
>> So VEX.W is ignored in 32bit (i.e. doesn't raise #UD), and *does* cause
>> 64bit mode to operate on 32bit operands, contrary to the manual.
> Doesn't look so to me: The first result is a 64-bit one, and I'd expect
> VEX.W=1 to be "matching mode".

"matching mode" means "what the assembler generated when using the
mnemonic".  I didn't try hand-coding andn it to start with.

Here it is spelt out more clearly.

--- Xen Test Framework ---
Environment: HVM 32bit (No paging)
Test andn
Test VEX.W=0:
  andn cccca5a5, ff00ff00 = 00cc00a5
Test VEX.W=1:
  andn cccca5a5, ff00ff00 = 00cc00a5
Test result: SUCCESS

--- Xen Test Framework ---
Environment: HVM 64bit (Long mode 4 levels)
Test andn
Test VEX.W=1:
  andn cccca5a5cccca5a5, ff00ff00ff00ff00 = 00cc00a500cc00a5
Test VEX.W=0:
  andn cccca5a5cccca5a5, ff00ff00ff00ff00 = 0000000000cc00a5
Test result: SUCCESS

My conclusions still stand.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/8] x86emul: support TBM insns
  2017-01-16 11:36     ` Jan Beulich
@ 2017-01-16 14:52       ` Andrew Cooper
  2017-01-16 15:45         ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Andrew Cooper @ 2017-01-16 14:52 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 16/01/17 11:36, Jan Beulich wrote:
>>>> On 13.01.17 at 19:48, <andrew.cooper3@citrix.com> wrote:
>> On 13/01/17 15:32, Jan Beulich wrote:
>>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>>> @@ -1355,6 +1355,7 @@ static bool vcpu_has(
>>>  #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
>>>  #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
>>>  #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
>>> +#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
>>>  #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
>>>  #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
>>>  #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
>>> @@ -6014,6 +6015,85 @@ x86_emulate(
>>>              asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
>>>          break;
>>>  
>>> +    case X86EMUL_OPC(0x8f09, 0x01): /* XOP Grp1 */
>> Surely this calls for the introduction of X86EMUL_OPC_XOP_* to match
>> their VEX/EVEX counterparts?
> Do really you think
>
>     case X86EMUL_OPC_XOP(09, 0x01): /* XOP Grp1 */
>
> or
>
>     case X86EMUL_OPC_XOP09(0x01): /* XOP Grp1 */
>
> are any better?

Either would be better, as it avoids the 0x8f magic prefix.

> Iirc you had asked this same question already
> when the opcode canonicalization patch was under review. The
> situation hasn't changed: The nothing/VEX/EVEX distinction is
> needed because the same base opcode may have (slightly or
> significantly) different meaning depending on which of the three
> (or four, if we also considered MVEX) encodings are being used.

MVEX is the precursor to EVEX, and as far as I can tell, was only
implemented on the Knights-Corner co-processor, now superseded by
Knights-Landing processor which uses EVEX.

There are a number of other reasons why Xen doesn't currently boot on
Knights-Corner (whereas its functions fine on Kights-Landing), so unless
someone has a specific usecase in mind and is willing to spend the
effort, I don't think it is worth our effort at the moment.

> There's no such duplicate meaning for XOP encodings.

How have you come to this conclusion?  The XOP map spaces are separate
to the main encodings, so the same primary opcode byte does have
different meanings depending on whether it is XOP encoded or not.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/8] x86emul: support BMI1 insns
  2017-01-16 14:17                 ` Andrew Cooper
@ 2017-01-16 15:43                   ` Jan Beulich
  0 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2017-01-16 15:43 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 16.01.17 at 15:17, <andrew.cooper3@citrix.com> wrote:
> On 16/01/17 13:58, Jan Beulich wrote:
>>>>> On 16.01.17 at 14:51, <andrew.cooper3@citrix.com> wrote:
>>> Right.  What happens in reality is this:
>>>
>>> --- Xen Test Framework ---
>>> Environment: HVM 32bit (No paging)
>>> Test VEX.W matching mode:
>>>   andn cccca5a5, ff00ff00 = 00cc00a5
>>> Test VEX.W opposite to mode:
>>>   andn cccca5a5, ff00ff00 = 00cc00a5
>>> Test result: SUCCESS
>>>
>>> --- Xen Test Framework ---
>>> Environment: HVM 64bit (Long mode 4 levels)
>>> Test VEX.W matching mode:
>>>   andn cccca5a5cccca5a5, ff00ff00ff00ff00 = 00cc00a500cc00a5
>>> Test VEX.W opposite to mode:
>>>   andn cccca5a5cccca5a5, ff00ff00ff00ff00 = 0000000000cc00a5
>>> Test result: SUCCESS
>>>
>>> So VEX.W is ignored in 32bit (i.e. doesn't raise #UD), and *does* cause
>>> 64bit mode to operate on 32bit operands, contrary to the manual.
>> Doesn't look so to me: The first result is a 64-bit one, and I'd expect
>> VEX.W=1 to be "matching mode".
> 
> "matching mode" means "what the assembler generated when using the
> mnemonic".  I didn't try hand-coding andn it to start with.
> 
> Here it is spelt out more clearly.
> 
> --- Xen Test Framework ---
> Environment: HVM 32bit (No paging)
> Test andn
> Test VEX.W=0:
>   andn cccca5a5, ff00ff00 = 00cc00a5
> Test VEX.W=1:
>   andn cccca5a5, ff00ff00 = 00cc00a5
> Test result: SUCCESS
> 
> --- Xen Test Framework ---
> Environment: HVM 64bit (Long mode 4 levels)
> Test andn
> Test VEX.W=1:
>   andn cccca5a5cccca5a5, ff00ff00ff00ff00 = 00cc00a500cc00a5
> Test VEX.W=0:
>   andn cccca5a5cccca5a5, ff00ff00ff00ff00 = 0000000000cc00a5
> Test result: SUCCESS
> 
> My conclusions still stand.

I don't follow; are we talking of different things? VEX.W=1 very
clearly produces a 64-bit operation above, as expected, and as
written in the SDM.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/8] x86emul: support TBM insns
  2017-01-16 14:52       ` Andrew Cooper
@ 2017-01-16 15:45         ` Jan Beulich
  0 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2017-01-16 15:45 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 16.01.17 at 15:52, <andrew.cooper3@citrix.com> wrote:
> On 16/01/17 11:36, Jan Beulich wrote:
>>>>> On 13.01.17 at 19:48, <andrew.cooper3@citrix.com> wrote:
>>> On 13/01/17 15:32, Jan Beulich wrote:
>>>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>>>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>>>> @@ -1355,6 +1355,7 @@ static bool vcpu_has(
>>>>  #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
>>>>  #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
>>>>  #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
>>>> +#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
>>>>  #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
>>>>  #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
>>>>  #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
>>>> @@ -6014,6 +6015,85 @@ x86_emulate(
>>>>              asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
>>>>          break;
>>>>  
>>>> +    case X86EMUL_OPC(0x8f09, 0x01): /* XOP Grp1 */
>>> Surely this calls for the introduction of X86EMUL_OPC_XOP_* to match
>>> their VEX/EVEX counterparts?
>> Do really you think
>>
>>     case X86EMUL_OPC_XOP(09, 0x01): /* XOP Grp1 */
>>
>> or
>>
>>     case X86EMUL_OPC_XOP09(0x01): /* XOP Grp1 */
>>
>> are any better?
> 
> Either would be better, as it avoids the 0x8f magic prefix.

Well, okay then.

>> Iirc you had asked this same question already
>> when the opcode canonicalization patch was under review. The
>> situation hasn't changed: The nothing/VEX/EVEX distinction is
>> needed because the same base opcode may have (slightly or
>> significantly) different meaning depending on which of the three
>> (or four, if we also considered MVEX) encodings are being used.
> 
> MVEX is the precursor to EVEX, and as far as I can tell, was only
> implemented on the Knights-Corner co-processor, now superseded by
> Knights-Landing processor which uses EVEX.
> 
> There are a number of other reasons why Xen doesn't currently boot on
> Knights-Corner (whereas its functions fine on Kights-Landing), so unless
> someone has a specific usecase in mind and is willing to spend the
> effort, I don't think it is worth our effort at the moment.

I fully agree.

>> There's no such duplicate meaning for XOP encodings.
> 
> How have you come to this conclusion?  The XOP map spaces are separate
> to the main encodings, so the same primary opcode byte does have
> different meanings depending on whether it is XOP encoded or not.

That's not the duplicate meaning I was referring to. What I've tried
to point at is that e.g. ADDPS and VADDPS share their encoding, and
are distinguished only by non-VEX, VEX, or EVEX. There's nothing
equivalent for XOP.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2017-01-16 15:45 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-13 15:11 [PATCH 0/8] x86emul: support various ISA extensions Jan Beulich
2017-01-13 15:30 ` [PATCH 1/8] x86emul: support POPCNT Jan Beulich
2017-01-13 16:31   ` Andrew Cooper
2017-01-13 15:31 ` [PATCH 2/8] x86emul: support ADCX/ADOX Jan Beulich
2017-01-13 16:34   ` Andrew Cooper
2017-01-13 15:31 ` [PATCH 3/8] x86emul: support BMI1 insns Jan Beulich
2017-01-13 17:40   ` Andrew Cooper
2017-01-16 11:19     ` Jan Beulich
2017-01-16 11:59       ` Andrew Cooper
2017-01-16 12:43         ` Jan Beulich
2017-01-16 12:57           ` Jan Beulich
2017-01-16 13:51             ` Andrew Cooper
2017-01-16 13:58               ` Jan Beulich
2017-01-16 14:17                 ` Andrew Cooper
2017-01-16 15:43                   ` Jan Beulich
2017-01-13 15:32 ` [PATCH 4/8] x86emul: support BMI2 insns Jan Beulich
2017-01-13 18:20   ` Andrew Cooper
2017-01-16 11:32     ` Jan Beulich
2017-01-13 15:32 ` [PATCH 5/8] x86emul: support TBM insns Jan Beulich
2017-01-13 18:48   ` Andrew Cooper
2017-01-16 11:36     ` Jan Beulich
2017-01-16 14:52       ` Andrew Cooper
2017-01-16 15:45         ` Jan Beulich
2017-01-13 15:34 ` [PATCH 6/8] x86emul: support RDRAND/RDSEED Jan Beulich
2017-01-13 18:55   ` Andrew Cooper
2017-01-13 15:34 ` [PATCH 7/8] x86emul: support RDPID Jan Beulich
2017-01-13 19:00   ` Andrew Cooper
2017-01-13 15:35 ` [PATCH 8/8] x86emul: rename the no_writeback label Jan Beulich
2017-01-13 19:01   ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.