From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: [PATCH v2 09/11] x86emul: support {,V}MOVNTDQA Date: Wed, 01 Feb 2017 04:17:50 -0700 Message-ID: <5891D1EF0200007800135C0F@prv-mh.provo.novell.com> References: <5891CF990200007800135BC5@prv-mh.provo.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=__PartAC9568CE.1__=" Return-path: Received: from mail6.bemta6.messagelabs.com ([193.109.254.103]) by lists.xenproject.org with esmtp (Exim 4.84_2) (envelope-from ) id 1cYsvF-0003S4-7E for xen-devel@lists.xenproject.org; Wed, 01 Feb 2017 11:17:57 +0000 In-Reply-To: <5891CF990200007800135BC5@prv-mh.provo.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Errors-To: xen-devel-bounces@lists.xen.org Sender: "Xen-devel" To: xen-devel Cc: Andrew Cooper List-Id: xen-devel@lists.xenproject.org This is a MIME message. If you are reading this text, you may want to consider changing to a mail reader or gateway that understands how to properly handle MIME multipart messages. --=__PartAC9568CE.1__= Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Content-Disposition: inline ... as the only post-SSE2 move insn. Signed-off-by: Jan Beulich --- v2: Re-base. --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -2389,6 +2389,74 @@ int main(int argc, char **argv) else printf("skipped\n"); =20 + printf("%-40s", "Testing movntdqa 16(%edx),%xmm4..."); + if ( stack_exec && cpu_has_sse4_1 ) + { + decl_insn(movntdqa); + + asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n" + put_insn(movntdqa, "movntdqa 16(%0), %%xmm4") + :: "d" (NULL) ); + + set_insn(movntdqa); + memset(res, 0x55, 64); + memset(res + 4, 0xff, 16); + regs.edx =3D (unsigned long)res; + rc =3D x86_emulate(&ctxt, &emulops); + if ( rc !=3D X86EMUL_OKAY || !check_eip(movntdqa) ) + goto fail; + asm ( "pcmpeqb %%xmm2, %%xmm2\n\t" + "pcmpeqb %%xmm4, %%xmm2\n\t" + "pmovmskb %%xmm2, %0" : "=3Dr" (rc) ); + if ( rc !=3D 0xffff ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing vmovntdqa (%ecx),%ymm4..."); + if ( stack_exec && cpu_has_avx2 ) + { + decl_insn(vmovntdqa); + +#if 0 /* Don't use AVX2 instructions for now */ + asm volatile ( "vpxor %%ymm4, %%ymm4, %%ymm4\n" + put_insn(vmovntdqa, "vmovntdqa (%0), %%ymm4") + :: "c" (NULL) ); +#else + asm volatile ( "vpxor %xmm4, %xmm4, %xmm4\n" + put_insn(vmovntdqa, + ".byte 0xc4, 0xe2, 0x7d, 0x2a, 0x21") ); +#endif + + set_insn(vmovntdqa); + memset(res, 0x55, 96); + memset(res + 8, 0xff, 32); + regs.ecx =3D (unsigned long)(res + 8); + rc =3D x86_emulate(&ctxt, &emulops); + if ( rc !=3D X86EMUL_OKAY || !check_eip(vmovntdqa) ) + goto fail; +#if 0 /* Don't use AVX2 instructions for now */ + asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" + "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t" + "vpmovmskb %%ymm0, %0" : "=3Dr" (rc) ); +#else + asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t" + "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t" + "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t" + "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t" + "vpmovmskb %%xmm0, %0\n\t" + "vpmovmskb %%xmm1, %1" : "=3Dr" (rc), "=3Dr" (i) ); + rc |=3D i << 16; +#endif + if ( ~rc ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing stmxcsr (%edx)..."); if ( cpu_has_sse ) { --- a/tools/tests/x86_emulator/x86_emulate.h +++ b/tools/tests/x86_emulator/x86_emulate.h @@ -95,6 +95,12 @@ static inline uint64_t xgetbv(uint32_t x (res.c & (1U << 0)) !=3D 0; \ }) =20 +#define cpu_has_sse4_1 ({ \ + struct cpuid_leaf res; \ + emul_test_cpuid(1, 0, &res, NULL); \ + (res.c & (1U << 19)) !=3D 0; \ +}) + #define cpu_has_popcnt ({ \ struct cpuid_leaf res; \ emul_test_cpuid(1, 0, &res, NULL); \ --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1433,6 +1433,7 @@ static bool vcpu_has( #define vcpu_has_sse2() vcpu_has( 1, EDX, 26, ctxt, ops) #define vcpu_has_sse3() vcpu_has( 1, ECX, 0, ctxt, ops) #define vcpu_has_cx16() vcpu_has( 1, ECX, 13, ctxt, ops) +#define vcpu_has_sse4_1() vcpu_has( 1, ECX, 19, ctxt, ops) #define vcpu_has_sse4_2() vcpu_has( 1, ECX, 20, ctxt, ops) #define vcpu_has_movbe() vcpu_has( 1, ECX, 22, ctxt, ops) #define vcpu_has_popcnt() vcpu_has( 1, ECX, 23, ctxt, ops) @@ -5944,6 +5945,7 @@ x86_emulate( case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa {x,y}mm,{x,y}mm/m128 = */ case X86EMUL_OPC_F3(0x0f, 0x7f): /* movdqu xmm,xmm/m128 */ case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu {x,y}mm,{x,y}mm/mem = */ + movdqa: if ( vex.opcx !=3D vex_none ) { host_and_vcpu_must_have(avx); @@ -6868,6 +6870,23 @@ x86_emulate( sfence =3D true; break; =20 + case X86EMUL_OPC_66(0x0f38, 0x2a): /* movntdqa m128,xmm */ + case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa mem,{x,y}mm */ + generate_exception_if(ea.type !=3D OP_MEM, EXC_UD); + /* Ignore the non-temporal hint for now, using movdqa instead. */ + asm volatile ( "mfence" ::: "memory" ); + b =3D 0x6f; + if ( vex.opcx =3D=3D vex_none ) + vcpu_must_have(sse4_1); + else + { + vex.opcx =3D vex_0f; + if ( vex.l ) + vcpu_must_have(avx2); + } + state->simd_size =3D simd_packed_int; + goto movdqa; + case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */ case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */ vcpu_must_have(movbe); --=__PartAC9568CE.1__= Content-Type: text/plain; name="x86emul-MOVNTDQA.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="x86emul-MOVNTDQA.patch" x86emul: support {,V}MOVNTDQA=0A=0A... as the only post-SSE2 move = insn.=0A=0ASigned-off-by: Jan Beulich =0A---=0Av2: = Re-base.=0A=0A--- a/tools/tests/x86_emulator/test_x86_emulator.c=0A+++ = b/tools/tests/x86_emulator/test_x86_emulator.c=0A@@ -2389,6 +2389,74 @@ = int main(int argc, char **argv)=0A else=0A printf("skipped\n");= =0A =0A+ printf("%-40s", "Testing movntdqa 16(%edx),%xmm4...");=0A+ = if ( stack_exec && cpu_has_sse4_1 )=0A+ {=0A+ decl_insn(movntdqa)= ;=0A+=0A+ asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"=0A+ = put_insn(movntdqa, "movntdqa 16(%0), %%xmm4")=0A+ = :: "d" (NULL) );=0A+=0A+ set_insn(movntdqa);=0A+ = memset(res, 0x55, 64);=0A+ memset(res + 4, 0xff, 16);=0A+ = regs.edx =3D (unsigned long)res;=0A+ rc =3D x86_emulate(&ctxt, = &emulops);=0A+ if ( rc !=3D X86EMUL_OKAY || !check_eip(movntdqa) = )=0A+ goto fail;=0A+ asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"= =0A+ "pcmpeqb %%xmm4, %%xmm2\n\t"=0A+ "pmovmskb = %%xmm2, %0" : "=3Dr" (rc) );=0A+ if ( rc !=3D 0xffff )=0A+ = goto fail;=0A+ printf("okay\n");=0A+ }=0A+ else=0A+ = printf("skipped\n");=0A+=0A+ printf("%-40s", "Testing vmovntdqa = (%ecx),%ymm4...");=0A+ if ( stack_exec && cpu_has_avx2 )=0A+ {=0A+ = decl_insn(vmovntdqa);=0A+=0A+#if 0 /* Don't use AVX2 instructions for = now */=0A+ asm volatile ( "vpxor %%ymm4, %%ymm4, %%ymm4\n"=0A+ = put_insn(vmovntdqa, "vmovntdqa (%0), %%ymm4")=0A+ = :: "c" (NULL) );=0A+#else=0A+ asm volatile ( "vpxor = %xmm4, %xmm4, %xmm4\n"=0A+ put_insn(vmovntdqa,=0A+ = ".byte 0xc4, 0xe2, 0x7d, 0x2a, 0x21") = );=0A+#endif=0A+=0A+ set_insn(vmovntdqa);=0A+ memset(res, = 0x55, 96);=0A+ memset(res + 8, 0xff, 32);=0A+ regs.ecx =3D = (unsigned long)(res + 8);=0A+ rc =3D x86_emulate(&ctxt, &emulops);= =0A+ if ( rc !=3D X86EMUL_OKAY || !check_eip(vmovntdqa) )=0A+ = goto fail;=0A+#if 0 /* Don't use AVX2 instructions for now */=0A+ = asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"=0A+ "vpcmpeqb = %%ymm4, %%ymm2, %%ymm0\n\t"=0A+ "vpmovmskb %%ymm0, %0" : = "=3Dr" (rc) );=0A+#else=0A+ asm ( "vextractf128 $1, %%ymm4, = %%xmm3\n\t"=0A+ "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"=0A+ = "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"=0A+ "vpcmpeqb = %%xmm3, %%xmm2, %%xmm1\n\t"=0A+ "vpmovmskb %%xmm0, %0\n\t"=0A+= "vpmovmskb %%xmm1, %1" : "=3Dr" (rc), "=3Dr" (i) );=0A+ = rc |=3D i << 16;=0A+#endif=0A+ if ( ~rc )=0A+ goto = fail;=0A+ printf("okay\n");=0A+ }=0A+ else=0A+ = printf("skipped\n");=0A+=0A printf("%-40s", "Testing stmxcsr (%edx)..."= );=0A if ( cpu_has_sse )=0A {=0A--- a/tools/tests/x86_emulator/x86_= emulate.h=0A+++ b/tools/tests/x86_emulator/x86_emulate.h=0A@@ -95,6 +95,12 = @@ static inline uint64_t xgetbv(uint32_t x=0A (res.c & (1U << 0)) = !=3D 0; \=0A })=0A =0A+#define cpu_has_sse4_1 ({ \=0A+ struct cpuid_leaf= res; \=0A+ emul_test_cpuid(1, 0, &res, NULL); \=0A+ (res.c & (1U << = 19)) !=3D 0; \=0A+})=0A+=0A #define cpu_has_popcnt ({ \=0A struct = cpuid_leaf res; \=0A emul_test_cpuid(1, 0, &res, NULL); \=0A--- = a/xen/arch/x86/x86_emulate/x86_emulate.c=0A+++ b/xen/arch/x86/x86_emulate/x= 86_emulate.c=0A@@ -1433,6 +1433,7 @@ static bool vcpu_has(=0A #define = vcpu_has_sse2() vcpu_has( 1, EDX, 26, ctxt, ops)=0A #define = vcpu_has_sse3() vcpu_has( 1, ECX, 0, ctxt, ops)=0A #define = vcpu_has_cx16() vcpu_has( 1, ECX, 13, ctxt, ops)=0A+#define = vcpu_has_sse4_1() vcpu_has( 1, ECX, 19, ctxt, ops)=0A #define = vcpu_has_sse4_2() vcpu_has( 1, ECX, 20, ctxt, ops)=0A #define = vcpu_has_movbe() vcpu_has( 1, ECX, 22, ctxt, ops)=0A #define = vcpu_has_popcnt() vcpu_has( 1, ECX, 23, ctxt, ops)=0A@@ = -5944,6 +5945,7 @@ x86_emulate(=0A case X86EMUL_OPC_VEX_66(0x0f, = 0x7f): /* vmovdqa {x,y}mm,{x,y}mm/m128 */=0A case X86EMUL_OPC_F3(0x0f, = 0x7f): /* movdqu xmm,xmm/m128 */=0A case X86EMUL_OPC_VEX_F3(0x0f, = 0x7f): /* vmovdqu {x,y}mm,{x,y}mm/mem */=0A+ movdqa:=0A if ( = vex.opcx !=3D vex_none )=0A {=0A host_and_vcpu_must_hav= e(avx);=0A@@ -6868,6 +6870,23 @@ x86_emulate(=0A sfence =3D = true;=0A break;=0A =0A+ case X86EMUL_OPC_66(0x0f38, 0x2a): /* = movntdqa m128,xmm */=0A+ case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* = vmovntdqa mem,{x,y}mm */=0A+ generate_exception_if(ea.type !=3D = OP_MEM, EXC_UD);=0A+ /* Ignore the non-temporal hint for now, using = movdqa instead. */=0A+ asm volatile ( "mfence" ::: "memory" );=0A+ = b =3D 0x6f;=0A+ if ( vex.opcx =3D=3D vex_none )=0A+ = vcpu_must_have(sse4_1);=0A+ else=0A+ {=0A+ = vex.opcx =3D vex_0f;=0A+ if ( vex.l )=0A+ = vcpu_must_have(avx2);=0A+ }=0A+ state->simd_size =3D = simd_packed_int;=0A+ goto movdqa;=0A+=0A case X86EMUL_OPC(0x0f38= , 0xf0): /* movbe m,r */=0A case X86EMUL_OPC(0x0f38, 0xf1): /* movbe = r,m */=0A vcpu_must_have(movbe);=0A --=__PartAC9568CE.1__= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: base64 Content-Disposition: inline X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KWGVuLWRldmVs IG1haWxpbmcgbGlzdApYZW4tZGV2ZWxAbGlzdHMueGVuLm9yZwpodHRwczovL2xpc3RzLnhlbi5v cmcveGVuLWRldmVsCg== --=__PartAC9568CE.1__=--