From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: [PATCH 3/3] x86emul: support MOVBE and CRC32 Date: Fri, 11 Mar 2016 10:35:25 -0700 Message-ID: <56E30FED02000078000DBB93@prv-mh.provo.novell.com> References: <56E30EA102000078000DBB7F@prv-mh.provo.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=__Part1126A9CD.1__=" Return-path: Received: from mail6.bemta14.messagelabs.com ([193.109.254.103]) by lists.xen.org with esmtp (Exim 4.84) (envelope-from ) id 1aeQyH-0004P7-7v for xen-devel@lists.xenproject.org; Fri, 11 Mar 2016 17:35:29 +0000 In-Reply-To: <56E30EA102000078000DBB7F@prv-mh.provo.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Errors-To: xen-devel-bounces@lists.xen.org Sender: "Xen-devel" To: xen-devel Cc: Andrew Cooper , Keir Fraser List-Id: xen-devel@lists.xenproject.org This is a MIME message. If you are reading this text, you may want to consider changing to a mail reader or gateway that understands how to properly handle MIME multipart messages. --=__Part1126A9CD.1__= Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Content-Disposition: inline The former in an attempt to at least gradually support all simple data movement instructions. The latter just because it shares the opcode with the former. Signed-off-by: Jan Beulich --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -78,7 +78,14 @@ static int cpuid( unsigned int *edx, struct x86_emulate_ctxt *ctxt) { + unsigned int leaf =3D *eax; + asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=3Dd" (*edx), "=3Db" = (*ebx)); + + /* The emulator doesn't itself use MOVBE, so we can always run the = test. */ + if ( leaf =3D=3D 1 ) + *ecx |=3D 1U << 22; + return X86EMUL_OKAY; } =20 @@ -605,6 +612,34 @@ int main(int argc, char **argv) printf("skipped\n"); #endif =20 + printf("%-40s", "Testing movbe (%%ecx),%%eax..."); + instr[0] =3D 0x0f; instr[1] =3D 0x38; instr[2] =3D 0xf0; instr[3] =3D = 0x01; + regs.eflags =3D 0x200; + regs.eip =3D (unsigned long)&instr[0]; + regs.ecx =3D (unsigned long)res; + regs.eax =3D 0x11111111; + *res =3D 0x12345678; + rc =3D x86_emulate(&ctxt, &emulops); + if ( (rc !=3D X86EMUL_OKAY) || + (*res !=3D 0x12345678) || + (regs.eax !=3D 0x78563412) || + (regs.eflags !=3D 0x200) || + (regs.eip !=3D (unsigned long)&instr[4]) ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing movbe %%ax,(%%ecx)..."); + instr[0] =3D 0x66; instr[1] =3D 0x0f; instr[2] =3D 0x38; instr[3] =3D = 0xf1; instr[4] =3D 0x01; + regs.eip =3D (unsigned long)&instr[0]; + rc =3D x86_emulate(&ctxt, &emulops); + if ( (rc !=3D X86EMUL_OKAY) || + (*res !=3D 0x12341234) || + (regs.eax !=3D 0x78563412) || + (regs.eflags !=3D 0x200) || + (regs.eip !=3D (unsigned long)&instr[5]) ) + goto fail; + printf("okay\n"); + #define decl_insn(which) extern const unsigned char which[], which##_len[]= #define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \ #which ": " insn "\n" \ --- a/tools/tests/x86_emulator/x86_emulate.c +++ b/tools/tests/x86_emulator/x86_emulate.c @@ -12,6 +12,7 @@ typedef bool bool_t; =20 #define BUG() abort() #define ASSERT assert +#define ASSERT_UNREACHABLE() assert(!__LINE__) =20 #define cpu_has_amd_erratum(nr) 0 #define mark_regs_dirty(r) ((void)(r)) --- a/xen/arch/x86/Rules.mk +++ b/xen/arch/x86/Rules.mk @@ -16,6 +16,7 @@ CFLAGS +=3D -msoft-float $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS)) $(call cc-option-add,CFLAGS,CC,-Wnested-externs) $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX) +$(call as-insn-check,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_GAS_SSE4_2= ) $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT= ) $(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE) $(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \ --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -188,7 +188,7 @@ static uint8_t twobyte_table[256] =3D { ImplicitOps, ImplicitOps, ImplicitOps, 0, ImplicitOps, ImplicitOps, 0, 0, /* 0x38 - 0x3F */ - 0, 0, 0, 0, 0, 0, 0, 0, + DstReg|SrcMem|ModRM, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x47 */ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, @@ -1091,6 +1091,8 @@ static bool_t vcpu_has( #define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26) #define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX, 0) #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13) +#define vcpu_must_have_sse4_2() vcpu_must_have(0x00000001, ECX, 20) +#define vcpu_must_have_movbe() vcpu_must_have(0x00000001, ECX, 22) #define vcpu_must_have_avx() vcpu_must_have(0x00000001, ECX, 28) =20 #ifdef __XEN__ @@ -1503,8 +1505,9 @@ x86_emulate( /* Shadow copy of register state. Committed on successful emulation. = */ struct cpu_user_regs _regs =3D *ctxt->regs; =20 - uint8_t b, d, sib, sib_index, sib_base, twobyte =3D 0, rex_prefix =3D = 0; + uint8_t b, d, sib, sib_index, sib_base, rex_prefix =3D 0; uint8_t modrm =3D 0, modrm_mod =3D 0, modrm_reg =3D 0, modrm_rm =3D = 0; + enum { ext_none, ext_0f, ext_0f38 } ext =3D ext_none; union vex vex =3D {}; unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes; bool_t lock_prefix =3D 0; @@ -1600,9 +1603,18 @@ x86_emulate( /* Two-byte opcode? */ if ( b =3D=3D 0x0f ) { - twobyte =3D 1; b =3D insn_fetch_type(uint8_t); d =3D twobyte_table[b]; + switch ( b ) + { + default: + ext =3D ext_0f; + break; + case 0x38: + b =3D insn_fetch_type(uint8_t); + ext =3D ext_0f38; + break; + } } =20 /* Unrecognised? */ @@ -1619,7 +1631,7 @@ x86_emulate( modrm =3D insn_fetch_type(uint8_t); modrm_mod =3D (modrm & 0xc0) >> 6; =20 - if ( !twobyte && ((b & ~1) =3D=3D 0xc4) ) + if ( !ext && ((b & ~1) =3D=3D 0xc4) ) switch ( def_ad_bytes ) { default: @@ -1665,12 +1677,12 @@ x86_emulate( rex_prefix |=3D REX_R; =20 fail_if(vex.opcx !=3D vex_0f); - twobyte =3D 1; + ext =3D ext_0f; b =3D insn_fetch_type(uint8_t); d =3D twobyte_table[b]; =20 /* Unrecognised? */ - if ( d =3D=3D 0 ) + if ( d =3D=3D 0 || b =3D=3D 0x38 ) goto cannot_emulate; =20 modrm =3D insn_fetch_type(uint8_t); @@ -1756,7 +1768,7 @@ x86_emulate( { ea.mem.seg =3D x86_seg_ss; ea.mem.off +=3D _regs.esp; - if ( !twobyte && (b =3D=3D 0x8f) ) + if ( !ext && (b =3D=3D 0x8f) ) /* POP computes its EA post increment. */ ea.mem.off +=3D ((mode_64bit() && (op_bytes = =3D=3D 4)) ? 8 : op_bytes); @@ -1791,12 +1803,12 @@ x86_emulate( ((op_bytes =3D=3D 8) ? 4 : op_bytes); else if ( (d & SrcMask) =3D=3D SrcImmByte ) ea.mem.off +=3D 1; - else if ( !twobyte && ((b & 0xfe) =3D=3D 0xf6) && + else if ( !ext && ((b & 0xfe) =3D=3D 0xf6) && ((modrm_reg & 7) <=3D 1) ) /* Special case in Grp3: test has immediate operand. = */ ea.mem.off +=3D (d & ByteOp) ? 1 : ((op_bytes =3D=3D 8) ? 4 : op_bytes); - else if ( twobyte && ((b & 0xf7) =3D=3D 0xa4) ) + else if ( ext =3D=3D ext_0f && ((b & 0xf7) =3D=3D 0xa4) ) /* SHLD/SHRD with immediate byte third operand. */ ea.mem.off++; break; @@ -1815,7 +1827,9 @@ x86_emulate( ea.mem.seg =3D override_seg; =20 /* Early operand adjustments. */ - if ( !twobyte ) + switch ( ext ) + { + case ext_none: switch ( b ) { case 0xf6 ... 0xf7: /* Grp3 */ @@ -1848,6 +1862,29 @@ x86_emulate( } break; } + break; + + case ext_0f: + break; + + case ext_0f38: + switch ( b ) + { + case 0xf0: /* movbe / crc32 */ + d |=3D repne_prefix() ? ByteOp : Mov; + break; + case 0xf1: /* movbe / crc32 */ + if ( !repne_prefix() ) + d =3D (d & ~(DstMask | SrcMask)) | DstMem | SrcReg | Mov; + break; + default: /* Until it is worth making this table based ... */ + goto cannot_emulate; + } + break; + + default: + ASSERT_UNREACHABLE(); + } =20 /* Decode and fetch the source operand: register, memory or immediate.= */ switch ( d & SrcMask ) @@ -2006,8 +2043,18 @@ x86_emulate( break; } =20 - if ( twobyte ) - goto twobyte_insn; + switch ( ext ) + { + case ext_none: + break; + case ext_0f: + goto ext_0f_insn; + case ext_0f38: + goto ext_0f38_insn; + default: + ASSERT_UNREACHABLE(); + goto cannot_emulate; + } =20 switch ( b ) { @@ -2050,7 +2097,7 @@ x86_emulate( struct segment_register reg; src.val =3D x86_seg_es; push_seg: - generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1); + generate_exception_if(mode_64bit() && !ext, EXC_UD, -1); fail_if(ops->read_segment =3D=3D NULL); if ( (rc =3D ops->read_segment(src.val, ®, ctxt)) !=3D 0 ) return rc; @@ -2066,7 +2113,7 @@ x86_emulate( case 0x07: /* pop %%es */ src.val =3D x86_seg_es; pop_seg: - generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1); + generate_exception_if(mode_64bit() && !ext, EXC_UD, -1); fail_if(ops->write_segment =3D=3D NULL); /* 64-bit mode: POP defaults to a 64-bit operand. */ if ( mode_64bit() && (op_bytes =3D=3D 4) ) @@ -2721,7 +2768,7 @@ x86_emulate( unsigned long sel; dst.val =3D x86_seg_es; les: /* dst.val identifies the segment */ - generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1); + generate_exception_if(mode_64bit() && !ext, EXC_UD, -1); generate_exception_if(src.type !=3D OP_MEM, EXC_UD, -1); if ( (rc =3D read_ulong(src.mem.seg, src.mem.off + src.bytes, &sel, 2, ctxt, ops)) !=3D 0 ) @@ -3862,7 +3909,7 @@ x86_emulate( put_stub(stub); return rc; =20 - twobyte_insn: + ext_0f_insn: switch ( b ) { case 0x00: /* Grp6 */ @@ -4765,6 +4812,72 @@ x86_emulate( } goto writeback; =20 + ext_0f38_insn: + switch ( b ) + { + case 0xf0: case 0xf1: /* movbe / crc32 */ + generate_exception_if(repe_prefix(), EXC_UD, -1); + if ( repne_prefix() ) + { + /* crc32 */ +#ifdef HAVE_GAS_SSE4_2 + host_and_vcpu_must_have(sse4_2); + dst.bytes =3D rex_prefix & REX_W ? 8 : 4; + switch ( op_bytes ) + { + case 1: + asm ( "crc32b %1,%k0" : "+r" (dst.val) + : "qm" (*(uint8_t *)&src.val) ); + break; + case 2: + asm ( "crc32w %1,%k0" : "+r" (dst.val) + : "rm" (*(uint16_t *)&src.val) ); + break; + case 4: + asm ( "crc32l %1,%k0" : "+r" (dst.val) + : "rm" (*(uint32_t *)&src.val) ); + break; +# ifdef __x86_64__ + case 8: + asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) ); + break; +# endif + default: + ASSERT_UNREACHABLE(); + } +#else /* !HAVE_GAS_SSE4_2 */ + goto cannot_emulate; +#endif + } + else + { + /* movbe */ + vcpu_must_have_movbe(); + switch ( op_bytes ) + { + case 2: + asm ( "xchg %h0,%b0" : "=3DQ" (dst.val) + : "0" (*(uint32_t *)&src.val) ); + break; + case 4: +#ifdef __x86_64__ + asm ( "bswap %k0" : "=3Dr" (dst.val) + : "0" (*(uint32_t *)&src.val) ); + break; + case 8: +#endif + asm ( "bswap %0" : "=3Dr" (dst.val) : "0" (src.val) ); + break; + default: + ASSERT_UNREACHABLE(); + } + } + break; + default: + goto cannot_emulate; + } + goto writeback; + cannot_emulate: _put_fpu(); put_stub(stub); --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -189,6 +189,7 @@ #define cpu_has_sse boot_cpu_has(X86_FEATURE_SSE) #define cpu_has_sse2 boot_cpu_has(X86_FEATURE_SSE2) #define cpu_has_sse3 boot_cpu_has(X86_FEATURE_SSE3) +#define cpu_has_sse4_2 boot_cpu_has(X86_FEATURE_SSE4_2) #define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) #define cpu_has_mp 1 #define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) --=__Part1126A9CD.1__= Content-Type: text/plain; name="x86emul-movbe.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="x86emul-movbe.patch" x86emul: support MOVBE and CRC32=0A=0AThe former in an attempt to at least = gradually support all simple data=0Amovement instructions. The latter just = because it shares the opcode=0Awith the former.=0A=0ASigned-off-by: Jan = Beulich =0A=0A--- a/tools/tests/x86_emulator/test_x86_em= ulator.c=0A+++ b/tools/tests/x86_emulator/test_x86_emulator.c=0A@@ -78,7 = +78,14 @@ static int cpuid(=0A unsigned int *edx,=0A struct = x86_emulate_ctxt *ctxt)=0A {=0A+ unsigned int leaf =3D *eax;=0A+=0A = asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=3Dd" (*edx), "=3Db" (*ebx));=0A+= =0A+ /* The emulator doesn't itself use MOVBE, so we can always run the = test. */=0A+ if ( leaf =3D=3D 1 )=0A+ *ecx |=3D 1U << 22;=0A+=0A = return X86EMUL_OKAY;=0A }=0A =0A@@ -605,6 +612,34 @@ int main(int = argc, char **argv)=0A printf("skipped\n");=0A #endif=0A =0A+ = printf("%-40s", "Testing movbe (%%ecx),%%eax...");=0A+ instr[0] =3D = 0x0f; instr[1] =3D 0x38; instr[2] =3D 0xf0; instr[3] =3D 0x01;=0A+ = regs.eflags =3D 0x200;=0A+ regs.eip =3D (unsigned long)&instr[0];=0A+= regs.ecx =3D (unsigned long)res;=0A+ regs.eax =3D 0x11111111;= =0A+ *res =3D 0x12345678;=0A+ rc =3D x86_emulate(&ctxt, = &emulops);=0A+ if ( (rc !=3D X86EMUL_OKAY) ||=0A+ (*res !=3D = 0x12345678) ||=0A+ (regs.eax !=3D 0x78563412) ||=0A+ = (regs.eflags !=3D 0x200) ||=0A+ (regs.eip !=3D (unsigned long)&inst= r[4]) )=0A+ goto fail;=0A+ printf("okay\n");=0A+=0A+ = printf("%-40s", "Testing movbe %%ax,(%%ecx)...");=0A+ instr[0] =3D = 0x66; instr[1] =3D 0x0f; instr[2] =3D 0x38; instr[3] =3D 0xf1; instr[4] = =3D 0x01;=0A+ regs.eip =3D (unsigned long)&instr[0];=0A+ rc =3D = x86_emulate(&ctxt, &emulops);=0A+ if ( (rc !=3D X86EMUL_OKAY) ||=0A+ = (*res !=3D 0x12341234) ||=0A+ (regs.eax !=3D 0x78563412) = ||=0A+ (regs.eflags !=3D 0x200) ||=0A+ (regs.eip !=3D = (unsigned long)&instr[5]) )=0A+ goto fail;=0A+ printf("okay\n");= =0A+=0A #define decl_insn(which) extern const unsigned char which[], = which##_len[]=0A #define put_insn(which, insn) ".pushsection .test, = \"ax\", @progbits\n" \=0A #which ": " insn = "\n" \=0A--- a/tools/tests/x86_emulator/x86_emulate.c= =0A+++ b/tools/tests/x86_emulator/x86_emulate.c=0A@@ -12,6 +12,7 @@ = typedef bool bool_t;=0A =0A #define BUG() abort()=0A #define ASSERT = assert=0A+#define ASSERT_UNREACHABLE() assert(!__LINE__)=0A =0A #define = cpu_has_amd_erratum(nr) 0=0A #define mark_regs_dirty(r) ((void)(r))=0A--- = a/xen/arch/x86/Rules.mk=0A+++ b/xen/arch/x86/Rules.mk=0A@@ -16,6 +16,7 @@ = CFLAGS +=3D -msoft-float=0A $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTR= A_CFLAGS))=0A $(call cc-option-add,CFLAGS,CC,-Wnested-externs)=0A $(call = as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)=0A+$(call as-insn-check,CF= LAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_GAS_SSE4_2)=0A $(call as-insn-chec= k,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)=0A $(call = as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)=0A $(call = as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \=0A--- a/xen/arch/x86/x86_= emulate/x86_emulate.c=0A+++ b/xen/arch/x86/x86_emulate/x86_emulate.c=0A@@ = -188,7 +188,7 @@ static uint8_t twobyte_table[256] =3D {=0A ImplicitOps= , ImplicitOps, ImplicitOps, 0,=0A ImplicitOps, ImplicitOps, 0, 0,=0A = /* 0x38 - 0x3F */=0A- 0, 0, 0, 0, 0, 0, 0, 0,=0A+ DstReg|SrcMem|Mod= RM, 0, 0, 0, 0, 0, 0, 0,=0A /* 0x40 - 0x47 */=0A DstReg|SrcMem|ModR= M|Mov, DstReg|SrcMem|ModRM|Mov,=0A DstReg|SrcMem|ModRM|Mov, DstReg|SrcM= em|ModRM|Mov,=0A@@ -1091,6 +1091,8 @@ static bool_t vcpu_has(=0A #define = vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)=0A #define = vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX, 0)=0A #define = vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)=0A+#define = vcpu_must_have_sse4_2() vcpu_must_have(0x00000001, ECX, 20)=0A+#define = vcpu_must_have_movbe() vcpu_must_have(0x00000001, ECX, 22)=0A #define = vcpu_must_have_avx() vcpu_must_have(0x00000001, ECX, 28)=0A =0A #ifdef = __XEN__=0A@@ -1503,8 +1505,9 @@ x86_emulate(=0A /* Shadow copy of = register state. Committed on successful emulation. */=0A struct = cpu_user_regs _regs =3D *ctxt->regs;=0A =0A- uint8_t b, d, sib, = sib_index, sib_base, twobyte =3D 0, rex_prefix =3D 0;=0A+ uint8_t b, d, = sib, sib_index, sib_base, rex_prefix =3D 0;=0A uint8_t modrm =3D 0, = modrm_mod =3D 0, modrm_reg =3D 0, modrm_rm =3D 0;=0A+ enum { ext_none, = ext_0f, ext_0f38 } ext =3D ext_none;=0A union vex vex =3D {};=0A = unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes;=0A bool_t = lock_prefix =3D 0;=0A@@ -1600,9 +1603,18 @@ x86_emulate(=0A /* = Two-byte opcode? */=0A if ( b =3D=3D 0x0f )=0A {=0A- = twobyte =3D 1;=0A b =3D insn_fetch_type(uint8_t);=0A = d =3D twobyte_table[b];=0A+ switch ( b )=0A+ = {=0A+ default:=0A+ ext =3D ext_0f;=0A+ = break;=0A+ case 0x38:=0A+ b =3D insn_fetch_t= ype(uint8_t);=0A+ ext =3D ext_0f38;=0A+ = break;=0A+ }=0A }=0A =0A /* Unrecognised? = */=0A@@ -1619,7 +1631,7 @@ x86_emulate(=0A modrm =3D insn_fetch_typ= e(uint8_t);=0A modrm_mod =3D (modrm & 0xc0) >> 6;=0A =0A- = if ( !twobyte && ((b & ~1) =3D=3D 0xc4) )=0A+ if ( !ext && ((b & = ~1) =3D=3D 0xc4) )=0A switch ( def_ad_bytes )=0A = {=0A default:=0A@@ -1665,12 +1677,12 @@ x86_emulate(=0A = rex_prefix |=3D REX_R;=0A =0A fail_if(vex.opcx= !=3D vex_0f);=0A- twobyte =3D 1;=0A+ ext = =3D ext_0f;=0A b =3D insn_fetch_type(uint8_t);=0A = d =3D twobyte_table[b];=0A =0A /* Unrecognised? = */=0A- if ( d =3D=3D 0 )=0A+ if ( d =3D=3D 0 = || b =3D=3D 0x38 )=0A goto cannot_emulate;=0A =0A = modrm =3D insn_fetch_type(uint8_t);=0A@@ -1756,7 +1768,7 @@ = x86_emulate(=0A {=0A ea.mem.seg =3D = x86_seg_ss;=0A ea.mem.off +=3D _regs.esp;=0A- = if ( !twobyte && (b =3D=3D 0x8f) )=0A+ if ( = !ext && (b =3D=3D 0x8f) )=0A /* POP computes = its EA post increment. */=0A ea.mem.off +=3D = ((mode_64bit() && (op_bytes =3D=3D 4))=0A = ? 8 : op_bytes);=0A@@ -1791,12 +1803,12 @@ x86_emulate(=0A = ((op_bytes =3D=3D 8) ? 4 : op_bytes);=0A = else if ( (d & SrcMask) =3D=3D SrcImmByte )=0A = ea.mem.off +=3D 1;=0A- else if ( !twobyte && ((b & 0xfe) = =3D=3D 0xf6) &&=0A+ else if ( !ext && ((b & 0xfe) =3D=3D = 0xf6) &&=0A ((modrm_reg & 7) <=3D 1) )=0A = /* Special case in Grp3: test has immediate operand. */=0A = ea.mem.off +=3D (d & ByteOp) ? 1=0A = : ((op_bytes =3D=3D 8) ? 4 : op_bytes);=0A- else if ( = twobyte && ((b & 0xf7) =3D=3D 0xa4) )=0A+ else if ( ext = =3D=3D ext_0f && ((b & 0xf7) =3D=3D 0xa4) )=0A /* = SHLD/SHRD with immediate byte third operand. */=0A = ea.mem.off++;=0A break;=0A@@ -1815,7 +1827,9 @@ x86_emulate= (=0A ea.mem.seg =3D override_seg;=0A =0A /* Early operand = adjustments. */=0A- if ( !twobyte )=0A+ switch ( ext )=0A+ {=0A+ = case ext_none:=0A switch ( b )=0A {=0A case 0xf6 = ... 0xf7: /* Grp3 */=0A@@ -1848,6 +1862,29 @@ x86_emulate(=0A = }=0A break;=0A }=0A+ break;=0A+=0A+ case = ext_0f:=0A+ break;=0A+=0A+ case ext_0f38:=0A+ switch ( b = )=0A+ {=0A+ case 0xf0: /* movbe / crc32 */=0A+ d = |=3D repne_prefix() ? ByteOp : Mov;=0A+ break;=0A+ case = 0xf1: /* movbe / crc32 */=0A+ if ( !repne_prefix() )=0A+ = d =3D (d & ~(DstMask | SrcMask)) | DstMem | SrcReg | Mov;=0A+ = break;=0A+ default: /* Until it is worth making this table = based ... */=0A+ goto cannot_emulate;=0A+ }=0A+ = break;=0A+=0A+ default:=0A+ ASSERT_UNREACHABLE();=0A+ }=0A = =0A /* Decode and fetch the source operand: register, memory or = immediate. */=0A switch ( d & SrcMask )=0A@@ -2006,8 +2043,18 @@ = x86_emulate(=0A break;=0A }=0A =0A- if ( twobyte )=0A- = goto twobyte_insn;=0A+ switch ( ext )=0A+ {=0A+ case = ext_none:=0A+ break;=0A+ case ext_0f:=0A+ goto ext_0f_insn= ;=0A+ case ext_0f38:=0A+ goto ext_0f38_insn;=0A+ default:=0A+ = ASSERT_UNREACHABLE();=0A+ goto cannot_emulate;=0A+ }=0A = =0A switch ( b )=0A {=0A@@ -2050,7 +2097,7 @@ x86_emulate(=0A = struct segment_register reg;=0A src.val =3D x86_seg_es;=0A = push_seg:=0A- generate_exception_if(mode_64bit() && !twobyte, = EXC_UD, -1);=0A+ generate_exception_if(mode_64bit() && !ext, = EXC_UD, -1);=0A fail_if(ops->read_segment =3D=3D NULL);=0A = if ( (rc =3D ops->read_segment(src.val, ®, ctxt)) !=3D 0 )=0A = return rc;=0A@@ -2066,7 +2113,7 @@ x86_emulate(=0A case 0x07: /* pop = %%es */=0A src.val =3D x86_seg_es;=0A pop_seg:=0A- = generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);=0A+ = generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);=0A = fail_if(ops->write_segment =3D=3D NULL);=0A /* 64-bit mode: POP = defaults to a 64-bit operand. */=0A if ( mode_64bit() && (op_bytes = =3D=3D 4) )=0A@@ -2721,7 +2768,7 @@ x86_emulate(=0A unsigned long = sel;=0A dst.val =3D x86_seg_es;=0A les: /* dst.val identifies = the segment */=0A- generate_exception_if(mode_64bit() && !twobyte, = EXC_UD, -1);=0A+ generate_exception_if(mode_64bit() && !ext, = EXC_UD, -1);=0A generate_exception_if(src.type !=3D OP_MEM, = EXC_UD, -1);=0A if ( (rc =3D read_ulong(src.mem.seg, src.mem.off + = src.bytes,=0A &sel, 2, ctxt, ops)) !=3D 0 = )=0A@@ -3862,7 +3909,7 @@ x86_emulate(=0A put_stub(stub);=0A = return rc;=0A =0A- twobyte_insn:=0A+ ext_0f_insn:=0A switch ( b )=0A = {=0A case 0x00: /* Grp6 */=0A@@ -4765,6 +4812,72 @@ x86_emulate(=0A = }=0A goto writeback;=0A =0A+ ext_0f38_insn:=0A+ switch ( b )=0A+ = {=0A+ case 0xf0: case 0xf1: /* movbe / crc32 */=0A+ generate_e= xception_if(repe_prefix(), EXC_UD, -1);=0A+ if ( repne_prefix() = )=0A+ {=0A+ /* crc32 */=0A+#ifdef HAVE_GAS_SSE4_2=0A+ = host_and_vcpu_must_have(sse4_2);=0A+ dst.bytes =3D = rex_prefix & REX_W ? 8 : 4;=0A+ switch ( op_bytes )=0A+ = {=0A+ case 1:=0A+ asm ( "crc32b %1,%k0" : = "+r" (dst.val)=0A+ : "qm" (*(uint8_t = *)&src.val) );=0A+ break;=0A+ case 2:=0A+ = asm ( "crc32w %1,%k0" : "+r" (dst.val)=0A+ = : "rm" (*(uint16_t *)&src.val) );=0A+ = break;=0A+ case 4:=0A+ asm ( "crc32l %1,%k0" : = "+r" (dst.val)=0A+ : "rm" (*(uint32_t = *)&src.val) );=0A+ break;=0A+# ifdef __x86_64__=0A+ = case 8:=0A+ asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" = (src.val) );=0A+ break;=0A+# endif=0A+ = default:=0A+ ASSERT_UNREACHABLE();=0A+ }=0A+#else= /* !HAVE_GAS_SSE4_2 */=0A+ goto cannot_emulate;=0A+#endif=0A+ = }=0A+ else=0A+ {=0A+ /* movbe */=0A+ = vcpu_must_have_movbe();=0A+ switch ( op_bytes )=0A+ = {=0A+ case 2:=0A+ asm ( "xchg %h0,%b0" : = "=3DQ" (dst.val)=0A+ : "0" (*(uint32_t = *)&src.val) );=0A+ break;=0A+ case 4:=0A+#ifdef = __x86_64__=0A+ asm ( "bswap %k0" : "=3Dr" (dst.val)=0A+ = : "0" (*(uint32_t *)&src.val) );=0A+ = break;=0A+ case 8:=0A+#endif=0A+ asm ( = "bswap %0" : "=3Dr" (dst.val) : "0" (src.val) );=0A+ = break;=0A+ default:=0A+ ASSERT_UNREACHABLE();=0A+= }=0A+ }=0A+ break;=0A+ default:=0A+ = goto cannot_emulate;=0A+ }=0A+ goto writeback;=0A+=0A cannot_emulate= :=0A _put_fpu();=0A put_stub(stub);=0A--- a/xen/include/asm-x86/cpu= feature.h=0A+++ b/xen/include/asm-x86/cpufeature.h=0A@@ -189,6 +189,7 = @@=0A #define cpu_has_sse boot_cpu_has(X86_FEATURE_SSE)=0A = #define cpu_has_sse2 boot_cpu_has(X86_FEATURE_SSE2)=0A #define = cpu_has_sse3 boot_cpu_has(X86_FEATURE_SSE3)=0A+#define = cpu_has_sse4_2 boot_cpu_has(X86_FEATURE_SSE4_2)=0A #define = cpu_has_ht boot_cpu_has(X86_FEATURE_HT)=0A #define cpu_has_mp = 1=0A #define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)=0A --=__Part1126A9CD.1__= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: base64 Content-Disposition: inline X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KWGVuLWRldmVs IG1haWxpbmcgbGlzdApYZW4tZGV2ZWxAbGlzdHMueGVuLm9yZwpodHRwOi8vbGlzdHMueGVuLm9y Zy94ZW4tZGV2ZWwK --=__Part1126A9CD.1__=--