xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: xen-devel <xen-devel@lists.xenproject.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>, Keir Fraser <keir@xen.org>
Subject: [PATCH 3/3] x86emul: support MOVBE and CRC32
Date: Fri, 11 Mar 2016 10:35:25 -0700	[thread overview]
Message-ID: <56E30FED02000078000DBB93@prv-mh.provo.novell.com> (raw)
In-Reply-To: <56E30EA102000078000DBB7F@prv-mh.provo.novell.com>

[-- Attachment #1: Type: text/plain, Size: 12638 bytes --]

The former in an attempt to at least gradually support all simple data
movement instructions. The latter just because it shares the opcode
with the former.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -78,7 +78,14 @@ static int cpuid(
     unsigned int *edx,
     struct x86_emulate_ctxt *ctxt)
 {
+    unsigned int leaf = *eax;
+
     asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=d" (*edx), "=b" (*ebx));
+
+    /* The emulator doesn't itself use MOVBE, so we can always run the test. */
+    if ( leaf == 1 )
+        *ecx |= 1U << 22;
+
     return X86EMUL_OKAY;
 }
 
@@ -605,6 +612,34 @@ int main(int argc, char **argv)
     printf("skipped\n");
 #endif
 
+    printf("%-40s", "Testing movbe (%%ecx),%%eax...");
+    instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf0; instr[3] = 0x01;
+    regs.eflags = 0x200;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.ecx    = (unsigned long)res;
+    regs.eax    = 0x11111111;
+    *res        = 0x12345678;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != 0x12345678) ||
+         (regs.eax != 0x78563412) ||
+         (regs.eflags != 0x200) ||
+         (regs.eip != (unsigned long)&instr[4]) )
+        goto fail;
+    printf("okay\n");
+
+    printf("%-40s", "Testing movbe %%ax,(%%ecx)...");
+    instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf1; instr[4] = 0x01;
+    regs.eip = (unsigned long)&instr[0];
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != 0x12341234) ||
+         (regs.eax != 0x78563412) ||
+         (regs.eflags != 0x200) ||
+         (regs.eip != (unsigned long)&instr[5]) )
+        goto fail;
+    printf("okay\n");
+
 #define decl_insn(which) extern const unsigned char which[], which##_len[]
 #define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \
                               #which ": " insn "\n"                     \
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -12,6 +12,7 @@ typedef bool bool_t;
 
 #define BUG() abort()
 #define ASSERT assert
+#define ASSERT_UNREACHABLE() assert(!__LINE__)
 
 #define cpu_has_amd_erratum(nr) 0
 #define mark_regs_dirty(r) ((void)(r))
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -16,6 +16,7 @@ CFLAGS += -msoft-float
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
 $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_GAS_SSE4_2)
 $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 $(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
 $(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -188,7 +188,7 @@ static uint8_t twobyte_table[256] = {
     ImplicitOps, ImplicitOps, ImplicitOps, 0,
     ImplicitOps, ImplicitOps, 0, 0,
     /* 0x38 - 0x3F */
-    0, 0, 0, 0, 0, 0, 0, 0,
+    DstReg|SrcMem|ModRM, 0, 0, 0, 0, 0, 0, 0,
     /* 0x40 - 0x47 */
     DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
     DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
@@ -1091,6 +1091,8 @@ static bool_t vcpu_has(
 #define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)
 #define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX,  0)
 #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
+#define vcpu_must_have_sse4_2() vcpu_must_have(0x00000001, ECX, 20)
+#define vcpu_must_have_movbe() vcpu_must_have(0x00000001, ECX, 22)
 #define vcpu_must_have_avx()  vcpu_must_have(0x00000001, ECX, 28)
 
 #ifdef __XEN__
@@ -1503,8 +1505,9 @@ x86_emulate(
     /* Shadow copy of register state. Committed on successful emulation. */
     struct cpu_user_regs _regs = *ctxt->regs;
 
-    uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0;
+    uint8_t b, d, sib, sib_index, sib_base, rex_prefix = 0;
     uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
+    enum { ext_none, ext_0f, ext_0f38 } ext = ext_none;
     union vex vex = {};
     unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes;
     bool_t lock_prefix = 0;
@@ -1600,9 +1603,18 @@ x86_emulate(
         /* Two-byte opcode? */
         if ( b == 0x0f )
         {
-            twobyte = 1;
             b = insn_fetch_type(uint8_t);
             d = twobyte_table[b];
+            switch ( b )
+            {
+            default:
+                ext = ext_0f;
+                break;
+            case 0x38:
+                b = insn_fetch_type(uint8_t);
+                ext = ext_0f38;
+                break;
+            }
         }
 
         /* Unrecognised? */
@@ -1619,7 +1631,7 @@ x86_emulate(
         modrm = insn_fetch_type(uint8_t);
         modrm_mod = (modrm & 0xc0) >> 6;
 
-        if ( !twobyte && ((b & ~1) == 0xc4) )
+        if ( !ext && ((b & ~1) == 0xc4) )
             switch ( def_ad_bytes )
             {
             default:
@@ -1665,12 +1677,12 @@ x86_emulate(
                     rex_prefix |= REX_R;
 
                 fail_if(vex.opcx != vex_0f);
-                twobyte = 1;
+                ext = ext_0f;
                 b = insn_fetch_type(uint8_t);
                 d = twobyte_table[b];
 
                 /* Unrecognised? */
-                if ( d == 0 )
+                if ( d == 0 || b == 0x38 )
                     goto cannot_emulate;
 
                 modrm = insn_fetch_type(uint8_t);
@@ -1756,7 +1768,7 @@ x86_emulate(
                 {
                     ea.mem.seg  = x86_seg_ss;
                     ea.mem.off += _regs.esp;
-                    if ( !twobyte && (b == 0x8f) )
+                    if ( !ext && (b == 0x8f) )
                         /* POP <rm> computes its EA post increment. */
                         ea.mem.off += ((mode_64bit() && (op_bytes == 4))
                                        ? 8 : op_bytes);
@@ -1791,12 +1803,12 @@ x86_emulate(
                         ((op_bytes == 8) ? 4 : op_bytes);
                 else if ( (d & SrcMask) == SrcImmByte )
                     ea.mem.off += 1;
-                else if ( !twobyte && ((b & 0xfe) == 0xf6) &&
+                else if ( !ext && ((b & 0xfe) == 0xf6) &&
                           ((modrm_reg & 7) <= 1) )
                     /* Special case in Grp3: test has immediate operand. */
                     ea.mem.off += (d & ByteOp) ? 1
                         : ((op_bytes == 8) ? 4 : op_bytes);
-                else if ( twobyte && ((b & 0xf7) == 0xa4) )
+                else if ( ext == ext_0f && ((b & 0xf7) == 0xa4) )
                     /* SHLD/SHRD with immediate byte third operand. */
                     ea.mem.off++;
                 break;
@@ -1815,7 +1827,9 @@ x86_emulate(
         ea.mem.seg = override_seg;
 
     /* Early operand adjustments. */
-    if ( !twobyte )
+    switch ( ext )
+    {
+    case ext_none:
         switch ( b )
         {
         case 0xf6 ... 0xf7: /* Grp3 */
@@ -1848,6 +1862,29 @@ x86_emulate(
             }
             break;
         }
+        break;
+
+    case ext_0f:
+        break;
+
+    case ext_0f38:
+        switch ( b )
+        {
+        case 0xf0: /* movbe / crc32 */
+            d |= repne_prefix() ? ByteOp : Mov;
+            break;
+        case 0xf1: /* movbe / crc32 */
+            if ( !repne_prefix() )
+                d = (d & ~(DstMask | SrcMask)) | DstMem | SrcReg | Mov;
+            break;
+        default: /* Until it is worth making this table based ... */
+            goto cannot_emulate;
+        }
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+    }
 
     /* Decode and fetch the source operand: register, memory or immediate. */
     switch ( d & SrcMask )
@@ -2006,8 +2043,18 @@ x86_emulate(
         break;
     }
 
-    if ( twobyte )
-        goto twobyte_insn;
+    switch ( ext )
+    {
+    case ext_none:
+        break;
+    case ext_0f:
+        goto ext_0f_insn;
+    case ext_0f38:
+        goto ext_0f38_insn;
+    default:
+        ASSERT_UNREACHABLE();
+        goto cannot_emulate;
+    }
 
     switch ( b )
     {
@@ -2050,7 +2097,7 @@ x86_emulate(
         struct segment_register reg;
         src.val = x86_seg_es;
     push_seg:
-        generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+        generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         fail_if(ops->read_segment == NULL);
         if ( (rc = ops->read_segment(src.val, &reg, ctxt)) != 0 )
             return rc;
@@ -2066,7 +2113,7 @@ x86_emulate(
     case 0x07: /* pop %%es */
         src.val = x86_seg_es;
     pop_seg:
-        generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+        generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         fail_if(ops->write_segment == NULL);
         /* 64-bit mode: POP defaults to a 64-bit operand. */
         if ( mode_64bit() && (op_bytes == 4) )
@@ -2721,7 +2768,7 @@ x86_emulate(
         unsigned long sel;
         dst.val = x86_seg_es;
     les: /* dst.val identifies the segment */
-        generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+        generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         generate_exception_if(src.type != OP_MEM, EXC_UD, -1);
         if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes,
                               &sel, 2, ctxt, ops)) != 0 )
@@ -3862,7 +3909,7 @@ x86_emulate(
     put_stub(stub);
     return rc;
 
- twobyte_insn:
+ ext_0f_insn:
     switch ( b )
     {
     case 0x00: /* Grp6 */
@@ -4765,6 +4812,72 @@ x86_emulate(
     }
     goto writeback;
 
+ ext_0f38_insn:
+    switch ( b )
+    {
+    case 0xf0: case 0xf1: /* movbe / crc32 */
+        generate_exception_if(repe_prefix(), EXC_UD, -1);
+        if ( repne_prefix() )
+        {
+            /* crc32 */
+#ifdef HAVE_GAS_SSE4_2
+            host_and_vcpu_must_have(sse4_2);
+            dst.bytes = rex_prefix & REX_W ? 8 : 4;
+            switch ( op_bytes )
+            {
+            case 1:
+                asm ( "crc32b %1,%k0" : "+r" (dst.val)
+                                      : "qm" (*(uint8_t *)&src.val) );
+                break;
+            case 2:
+                asm ( "crc32w %1,%k0" : "+r" (dst.val)
+                                      : "rm" (*(uint16_t *)&src.val) );
+                break;
+            case 4:
+                asm ( "crc32l %1,%k0" : "+r" (dst.val)
+                                      : "rm" (*(uint32_t *)&src.val) );
+                break;
+# ifdef __x86_64__
+            case 8:
+                asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) );
+                break;
+# endif
+            default:
+                ASSERT_UNREACHABLE();
+            }
+#else /* !HAVE_GAS_SSE4_2 */
+            goto cannot_emulate;
+#endif
+        }
+        else
+        {
+            /* movbe */
+            vcpu_must_have_movbe();
+            switch ( op_bytes )
+            {
+            case 2:
+                asm ( "xchg %h0,%b0" : "=Q" (dst.val)
+                                     : "0" (*(uint32_t *)&src.val) );
+                break;
+            case 4:
+#ifdef __x86_64__
+                asm ( "bswap %k0" : "=r" (dst.val)
+                                  : "0" (*(uint32_t *)&src.val) );
+                break;
+            case 8:
+#endif
+                asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) );
+                break;
+            default:
+                ASSERT_UNREACHABLE();
+            }
+        }
+        break;
+    default:
+        goto cannot_emulate;
+    }
+    goto writeback;
+
  cannot_emulate:
     _put_fpu();
     put_stub(stub);
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -189,6 +189,7 @@
 #define cpu_has_sse		boot_cpu_has(X86_FEATURE_SSE)
 #define cpu_has_sse2		boot_cpu_has(X86_FEATURE_SSE2)
 #define cpu_has_sse3		boot_cpu_has(X86_FEATURE_SSE3)
+#define cpu_has_sse4_2		boot_cpu_has(X86_FEATURE_SSE4_2)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_mp		1
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)



[-- Attachment #2: x86emul-movbe.patch --]
[-- Type: text/plain, Size: 12670 bytes --]

x86emul: support MOVBE and CRC32

The former in an attempt to at least gradually support all simple data
movement instructions. The latter just because it shares the opcode
with the former.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -78,7 +78,14 @@ static int cpuid(
     unsigned int *edx,
     struct x86_emulate_ctxt *ctxt)
 {
+    unsigned int leaf = *eax;
+
     asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=d" (*edx), "=b" (*ebx));
+
+    /* The emulator doesn't itself use MOVBE, so we can always run the test. */
+    if ( leaf == 1 )
+        *ecx |= 1U << 22;
+
     return X86EMUL_OKAY;
 }
 
@@ -605,6 +612,34 @@ int main(int argc, char **argv)
     printf("skipped\n");
 #endif
 
+    printf("%-40s", "Testing movbe (%%ecx),%%eax...");
+    instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf0; instr[3] = 0x01;
+    regs.eflags = 0x200;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.ecx    = (unsigned long)res;
+    regs.eax    = 0x11111111;
+    *res        = 0x12345678;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != 0x12345678) ||
+         (regs.eax != 0x78563412) ||
+         (regs.eflags != 0x200) ||
+         (regs.eip != (unsigned long)&instr[4]) )
+        goto fail;
+    printf("okay\n");
+
+    printf("%-40s", "Testing movbe %%ax,(%%ecx)...");
+    instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf1; instr[4] = 0x01;
+    regs.eip = (unsigned long)&instr[0];
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != 0x12341234) ||
+         (regs.eax != 0x78563412) ||
+         (regs.eflags != 0x200) ||
+         (regs.eip != (unsigned long)&instr[5]) )
+        goto fail;
+    printf("okay\n");
+
 #define decl_insn(which) extern const unsigned char which[], which##_len[]
 #define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \
                               #which ": " insn "\n"                     \
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -12,6 +12,7 @@ typedef bool bool_t;
 
 #define BUG() abort()
 #define ASSERT assert
+#define ASSERT_UNREACHABLE() assert(!__LINE__)
 
 #define cpu_has_amd_erratum(nr) 0
 #define mark_regs_dirty(r) ((void)(r))
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -16,6 +16,7 @@ CFLAGS += -msoft-float
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
 $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_GAS_SSE4_2)
 $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 $(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
 $(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -188,7 +188,7 @@ static uint8_t twobyte_table[256] = {
     ImplicitOps, ImplicitOps, ImplicitOps, 0,
     ImplicitOps, ImplicitOps, 0, 0,
     /* 0x38 - 0x3F */
-    0, 0, 0, 0, 0, 0, 0, 0,
+    DstReg|SrcMem|ModRM, 0, 0, 0, 0, 0, 0, 0,
     /* 0x40 - 0x47 */
     DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
     DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
@@ -1091,6 +1091,8 @@ static bool_t vcpu_has(
 #define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)
 #define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX,  0)
 #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
+#define vcpu_must_have_sse4_2() vcpu_must_have(0x00000001, ECX, 20)
+#define vcpu_must_have_movbe() vcpu_must_have(0x00000001, ECX, 22)
 #define vcpu_must_have_avx()  vcpu_must_have(0x00000001, ECX, 28)
 
 #ifdef __XEN__
@@ -1503,8 +1505,9 @@ x86_emulate(
     /* Shadow copy of register state. Committed on successful emulation. */
     struct cpu_user_regs _regs = *ctxt->regs;
 
-    uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0;
+    uint8_t b, d, sib, sib_index, sib_base, rex_prefix = 0;
     uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
+    enum { ext_none, ext_0f, ext_0f38 } ext = ext_none;
     union vex vex = {};
     unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes;
     bool_t lock_prefix = 0;
@@ -1600,9 +1603,18 @@ x86_emulate(
         /* Two-byte opcode? */
         if ( b == 0x0f )
         {
-            twobyte = 1;
             b = insn_fetch_type(uint8_t);
             d = twobyte_table[b];
+            switch ( b )
+            {
+            default:
+                ext = ext_0f;
+                break;
+            case 0x38:
+                b = insn_fetch_type(uint8_t);
+                ext = ext_0f38;
+                break;
+            }
         }
 
         /* Unrecognised? */
@@ -1619,7 +1631,7 @@ x86_emulate(
         modrm = insn_fetch_type(uint8_t);
         modrm_mod = (modrm & 0xc0) >> 6;
 
-        if ( !twobyte && ((b & ~1) == 0xc4) )
+        if ( !ext && ((b & ~1) == 0xc4) )
             switch ( def_ad_bytes )
             {
             default:
@@ -1665,12 +1677,12 @@ x86_emulate(
                     rex_prefix |= REX_R;
 
                 fail_if(vex.opcx != vex_0f);
-                twobyte = 1;
+                ext = ext_0f;
                 b = insn_fetch_type(uint8_t);
                 d = twobyte_table[b];
 
                 /* Unrecognised? */
-                if ( d == 0 )
+                if ( d == 0 || b == 0x38 )
                     goto cannot_emulate;
 
                 modrm = insn_fetch_type(uint8_t);
@@ -1756,7 +1768,7 @@ x86_emulate(
                 {
                     ea.mem.seg  = x86_seg_ss;
                     ea.mem.off += _regs.esp;
-                    if ( !twobyte && (b == 0x8f) )
+                    if ( !ext && (b == 0x8f) )
                         /* POP <rm> computes its EA post increment. */
                         ea.mem.off += ((mode_64bit() && (op_bytes == 4))
                                        ? 8 : op_bytes);
@@ -1791,12 +1803,12 @@ x86_emulate(
                         ((op_bytes == 8) ? 4 : op_bytes);
                 else if ( (d & SrcMask) == SrcImmByte )
                     ea.mem.off += 1;
-                else if ( !twobyte && ((b & 0xfe) == 0xf6) &&
+                else if ( !ext && ((b & 0xfe) == 0xf6) &&
                           ((modrm_reg & 7) <= 1) )
                     /* Special case in Grp3: test has immediate operand. */
                     ea.mem.off += (d & ByteOp) ? 1
                         : ((op_bytes == 8) ? 4 : op_bytes);
-                else if ( twobyte && ((b & 0xf7) == 0xa4) )
+                else if ( ext == ext_0f && ((b & 0xf7) == 0xa4) )
                     /* SHLD/SHRD with immediate byte third operand. */
                     ea.mem.off++;
                 break;
@@ -1815,7 +1827,9 @@ x86_emulate(
         ea.mem.seg = override_seg;
 
     /* Early operand adjustments. */
-    if ( !twobyte )
+    switch ( ext )
+    {
+    case ext_none:
         switch ( b )
         {
         case 0xf6 ... 0xf7: /* Grp3 */
@@ -1848,6 +1862,29 @@ x86_emulate(
             }
             break;
         }
+        break;
+
+    case ext_0f:
+        break;
+
+    case ext_0f38:
+        switch ( b )
+        {
+        case 0xf0: /* movbe / crc32 */
+            d |= repne_prefix() ? ByteOp : Mov;
+            break;
+        case 0xf1: /* movbe / crc32 */
+            if ( !repne_prefix() )
+                d = (d & ~(DstMask | SrcMask)) | DstMem | SrcReg | Mov;
+            break;
+        default: /* Until it is worth making this table based ... */
+            goto cannot_emulate;
+        }
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+    }
 
     /* Decode and fetch the source operand: register, memory or immediate. */
     switch ( d & SrcMask )
@@ -2006,8 +2043,18 @@ x86_emulate(
         break;
     }
 
-    if ( twobyte )
-        goto twobyte_insn;
+    switch ( ext )
+    {
+    case ext_none:
+        break;
+    case ext_0f:
+        goto ext_0f_insn;
+    case ext_0f38:
+        goto ext_0f38_insn;
+    default:
+        ASSERT_UNREACHABLE();
+        goto cannot_emulate;
+    }
 
     switch ( b )
     {
@@ -2050,7 +2097,7 @@ x86_emulate(
         struct segment_register reg;
         src.val = x86_seg_es;
     push_seg:
-        generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+        generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         fail_if(ops->read_segment == NULL);
         if ( (rc = ops->read_segment(src.val, &reg, ctxt)) != 0 )
             return rc;
@@ -2066,7 +2113,7 @@ x86_emulate(
     case 0x07: /* pop %%es */
         src.val = x86_seg_es;
     pop_seg:
-        generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+        generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         fail_if(ops->write_segment == NULL);
         /* 64-bit mode: POP defaults to a 64-bit operand. */
         if ( mode_64bit() && (op_bytes == 4) )
@@ -2721,7 +2768,7 @@ x86_emulate(
         unsigned long sel;
         dst.val = x86_seg_es;
     les: /* dst.val identifies the segment */
-        generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+        generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         generate_exception_if(src.type != OP_MEM, EXC_UD, -1);
         if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes,
                               &sel, 2, ctxt, ops)) != 0 )
@@ -3862,7 +3909,7 @@ x86_emulate(
     put_stub(stub);
     return rc;
 
- twobyte_insn:
+ ext_0f_insn:
     switch ( b )
     {
     case 0x00: /* Grp6 */
@@ -4765,6 +4812,72 @@ x86_emulate(
     }
     goto writeback;
 
+ ext_0f38_insn:
+    switch ( b )
+    {
+    case 0xf0: case 0xf1: /* movbe / crc32 */
+        generate_exception_if(repe_prefix(), EXC_UD, -1);
+        if ( repne_prefix() )
+        {
+            /* crc32 */
+#ifdef HAVE_GAS_SSE4_2
+            host_and_vcpu_must_have(sse4_2);
+            dst.bytes = rex_prefix & REX_W ? 8 : 4;
+            switch ( op_bytes )
+            {
+            case 1:
+                asm ( "crc32b %1,%k0" : "+r" (dst.val)
+                                      : "qm" (*(uint8_t *)&src.val) );
+                break;
+            case 2:
+                asm ( "crc32w %1,%k0" : "+r" (dst.val)
+                                      : "rm" (*(uint16_t *)&src.val) );
+                break;
+            case 4:
+                asm ( "crc32l %1,%k0" : "+r" (dst.val)
+                                      : "rm" (*(uint32_t *)&src.val) );
+                break;
+# ifdef __x86_64__
+            case 8:
+                asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) );
+                break;
+# endif
+            default:
+                ASSERT_UNREACHABLE();
+            }
+#else /* !HAVE_GAS_SSE4_2 */
+            goto cannot_emulate;
+#endif
+        }
+        else
+        {
+            /* movbe */
+            vcpu_must_have_movbe();
+            switch ( op_bytes )
+            {
+            case 2:
+                asm ( "xchg %h0,%b0" : "=Q" (dst.val)
+                                     : "0" (*(uint32_t *)&src.val) );
+                break;
+            case 4:
+#ifdef __x86_64__
+                asm ( "bswap %k0" : "=r" (dst.val)
+                                  : "0" (*(uint32_t *)&src.val) );
+                break;
+            case 8:
+#endif
+                asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) );
+                break;
+            default:
+                ASSERT_UNREACHABLE();
+            }
+        }
+        break;
+    default:
+        goto cannot_emulate;
+    }
+    goto writeback;
+
  cannot_emulate:
     _put_fpu();
     put_stub(stub);
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -189,6 +189,7 @@
 #define cpu_has_sse		boot_cpu_has(X86_FEATURE_SSE)
 #define cpu_has_sse2		boot_cpu_has(X86_FEATURE_SSE2)
 #define cpu_has_sse3		boot_cpu_has(X86_FEATURE_SSE3)
+#define cpu_has_sse4_2		boot_cpu_has(X86_FEATURE_SSE4_2)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_mp		1
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

      parent reply	other threads:[~2016-03-11 17:35 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-11 17:29 [PATCH 0/3] x86: instruction emulator improvements Jan Beulich
2016-03-11 17:33 ` [PATCH 1/3] x86: rename XMM* features to SSE* Jan Beulich
2016-03-11 17:34 ` [PATCH 2/3] x86emul: check host features alongside guest ones where needed Jan Beulich
2016-03-11 17:41   ` Andrew Cooper
2016-03-14  8:29     ` Jan Beulich
2016-03-14  8:52       ` Andrew Cooper
2016-03-11 17:35 ` Jan Beulich [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56E30FED02000078000DBB93@prv-mh.provo.novell.com \
    --to=jbeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=keir@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).