All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: xen-devel <xen-devel@lists.xenproject.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: [PATCH 8/8] x86emul: support {,V}MOVNTDQA
Date: Wed, 25 Jan 2017 08:07:32 -0700	[thread overview]
Message-ID: <5888CD440200007800133DC3@prv-mh.provo.novell.com> (raw)
In-Reply-To: <5888C9110200007800133D98@prv-mh.provo.novell.com>

[-- Attachment #1: Type: text/plain, Size: 5131 bytes --]

... as the only post-SSE2 move insn.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Re-base.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -2354,6 +2354,74 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movntdqa 16(%edx),%xmm4...");
+    if ( stack_exec && cpu_has_sse4_1 )
+    {
+        decl_insn(movntdqa);
+
+        asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
+                       put_insn(movntdqa, "movntdqa 16(%0), %%xmm4")
+                       :: "d" (NULL) );
+
+        set_insn(movntdqa);
+        memset(res, 0x55, 64);
+        memset(res + 4, 0xff, 16);
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movntdqa) )
+            goto fail;
+        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+              "pcmpeqb %%xmm4, %%xmm2\n\t"
+              "pmovmskb %%xmm2, %0" : "=r" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovntdqa (%ecx),%ymm4...");
+    if ( stack_exec && cpu_has_avx2 )
+    {
+        decl_insn(vmovntdqa);
+
+#if 0 /* Don't use AVX2 instructions for now */
+        asm volatile ( "vpxor %%ymm4, %%ymm4, %%ymm4\n"
+                       put_insn(vmovntdqa, "vmovntdqa (%0), %%ymm4")
+                       :: "c" (NULL) );
+#else
+        asm volatile ( "vpxor %xmm4, %xmm4, %xmm4\n"
+                       put_insn(vmovntdqa,
+                                ".byte 0xc4, 0xe2, 0x7d, 0x2a, 0x21") );
+#endif
+
+        set_insn(vmovntdqa);
+        memset(res, 0x55, 96);
+        memset(res + 8, 0xff, 32);
+        regs.ecx = (unsigned long)(res + 8);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovntdqa) )
+            goto fail;
+#if 0 /* Don't use AVX2 instructions for now */
+        asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+              "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+              "vpmovmskb %%ymm0, %0" : "=r" (rc) );
+#else
+        asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+              "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+              "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+              "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+              "vpmovmskb %%xmm0, %0\n\t"
+              "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) );
+        rc |= i << 16;
+#endif
+        if ( ~rc )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing stmxcsr (%edx)...");
     if ( cpu_has_sse )
     {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -95,6 +95,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.c & (1U << 0)) != 0; \
 })
 
+#define cpu_has_sse4_1 ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(1, 0, &res, NULL); \
+    (res.c & (1U << 19)) != 0; \
+})
+
 #define cpu_has_popcnt ({ \
     struct cpuid_leaf res; \
     emul_test_cpuid(1, 0, &res, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1449,6 +1449,7 @@ static bool vcpu_has(
 #define vcpu_has_sse2()        vcpu_has(         1, EDX, 26, ctxt, ops)
 #define vcpu_has_sse3()        vcpu_has(         1, ECX,  0, ctxt, ops)
 #define vcpu_has_cx16()        vcpu_has(         1, ECX, 13, ctxt, ops)
+#define vcpu_has_sse4_1()      vcpu_has(         1, ECX, 19, ctxt, ops)
 #define vcpu_has_sse4_2()      vcpu_has(         1, ECX, 20, ctxt, ops)
 #define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
 #define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)
@@ -5963,6 +5964,7 @@ x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa {x,y}mm,{x,y}mm/m128 */
     case X86EMUL_OPC_F3(0x0f, 0x7f):     /* movdqu xmm,xmm/m128 */
     case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu {x,y}mm,{x,y}mm/mem */
+    movdqa:
         if ( vex.opcx != vex_none )
         {
             host_and_vcpu_must_have(avx);
@@ -6886,6 +6888,23 @@ x86_emulate(
         sfence = true;
         break;
 
+    case X86EMUL_OPC_66(0x0f38, 0x2a): /* movntdqa m128,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa mem,{x,y}mm */
+        generate_exception_if(ea.type != OP_MEM, EXC_UD);
+        /* Ignore the non-temporal hint for now, using movdqa instead. */
+        asm volatile ( "mfence" ::: "memory" );
+        b = 0x6f;
+        if ( vex.opcx == vex_none )
+            vcpu_must_have(sse4_1);
+        else
+        {
+            vex.opcx = vex_0f;
+            if ( vex.l )
+                vcpu_must_have(avx2);
+        }
+        state->simd_size = simd_packed_int;
+        goto movdqa;
+
     case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
     case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
         vcpu_must_have(movbe);



[-- Attachment #2: x86emul-MOVNTDQA.patch --]
[-- Type: text/plain, Size: 5160 bytes --]

x86emul: support {,V}MOVNTDQA

... as the only post-SSE2 move insn.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Re-base.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -2354,6 +2354,74 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movntdqa 16(%edx),%xmm4...");
+    if ( stack_exec && cpu_has_sse4_1 )
+    {
+        decl_insn(movntdqa);
+
+        asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
+                       put_insn(movntdqa, "movntdqa 16(%0), %%xmm4")
+                       :: "d" (NULL) );
+
+        set_insn(movntdqa);
+        memset(res, 0x55, 64);
+        memset(res + 4, 0xff, 16);
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movntdqa) )
+            goto fail;
+        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+              "pcmpeqb %%xmm4, %%xmm2\n\t"
+              "pmovmskb %%xmm2, %0" : "=r" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovntdqa (%ecx),%ymm4...");
+    if ( stack_exec && cpu_has_avx2 )
+    {
+        decl_insn(vmovntdqa);
+
+#if 0 /* Don't use AVX2 instructions for now */
+        asm volatile ( "vpxor %%ymm4, %%ymm4, %%ymm4\n"
+                       put_insn(vmovntdqa, "vmovntdqa (%0), %%ymm4")
+                       :: "c" (NULL) );
+#else
+        asm volatile ( "vpxor %xmm4, %xmm4, %xmm4\n"
+                       put_insn(vmovntdqa,
+                                ".byte 0xc4, 0xe2, 0x7d, 0x2a, 0x21") );
+#endif
+
+        set_insn(vmovntdqa);
+        memset(res, 0x55, 96);
+        memset(res + 8, 0xff, 32);
+        regs.ecx = (unsigned long)(res + 8);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovntdqa) )
+            goto fail;
+#if 0 /* Don't use AVX2 instructions for now */
+        asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+              "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+              "vpmovmskb %%ymm0, %0" : "=r" (rc) );
+#else
+        asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+              "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+              "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+              "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+              "vpmovmskb %%xmm0, %0\n\t"
+              "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) );
+        rc |= i << 16;
+#endif
+        if ( ~rc )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing stmxcsr (%edx)...");
     if ( cpu_has_sse )
     {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -95,6 +95,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.c & (1U << 0)) != 0; \
 })
 
+#define cpu_has_sse4_1 ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(1, 0, &res, NULL); \
+    (res.c & (1U << 19)) != 0; \
+})
+
 #define cpu_has_popcnt ({ \
     struct cpuid_leaf res; \
     emul_test_cpuid(1, 0, &res, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1449,6 +1449,7 @@ static bool vcpu_has(
 #define vcpu_has_sse2()        vcpu_has(         1, EDX, 26, ctxt, ops)
 #define vcpu_has_sse3()        vcpu_has(         1, ECX,  0, ctxt, ops)
 #define vcpu_has_cx16()        vcpu_has(         1, ECX, 13, ctxt, ops)
+#define vcpu_has_sse4_1()      vcpu_has(         1, ECX, 19, ctxt, ops)
 #define vcpu_has_sse4_2()      vcpu_has(         1, ECX, 20, ctxt, ops)
 #define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
 #define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)
@@ -5963,6 +5964,7 @@ x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa {x,y}mm,{x,y}mm/m128 */
     case X86EMUL_OPC_F3(0x0f, 0x7f):     /* movdqu xmm,xmm/m128 */
     case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu {x,y}mm,{x,y}mm/mem */
+    movdqa:
         if ( vex.opcx != vex_none )
         {
             host_and_vcpu_must_have(avx);
@@ -6886,6 +6888,23 @@ x86_emulate(
         sfence = true;
         break;
 
+    case X86EMUL_OPC_66(0x0f38, 0x2a): /* movntdqa m128,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa mem,{x,y}mm */
+        generate_exception_if(ea.type != OP_MEM, EXC_UD);
+        /* Ignore the non-temporal hint for now, using movdqa instead. */
+        asm volatile ( "mfence" ::: "memory" );
+        b = 0x6f;
+        if ( vex.opcx == vex_none )
+            vcpu_must_have(sse4_1);
+        else
+        {
+            vex.opcx = vex_0f;
+            if ( vex.l )
+                vcpu_must_have(avx2);
+        }
+        state->simd_size = simd_packed_int;
+        goto movdqa;
+
     case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
     case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
         vcpu_must_have(movbe);

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

      parent reply	other threads:[~2017-01-25 15:07 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-25 14:49 [PATCH 0/8] x86emul: MMX/SSE/SSE2 support Jan Beulich
2017-01-25 15:03 ` [PATCH 1/8] x86emul: catch exceptions occurring in stubs Jan Beulich
2017-01-25 15:04 ` [PATCH 2/8] x86emul: support most memory accessing MMX/SSE/SSE2 insns Jan Beulich
2017-01-25 15:04 ` [PATCH 3/8] x86emul: support MMX/SSE/SSE2 moves Jan Beulich
2017-01-25 15:05 ` [PATCH 4/8] x86emul: support MMX/SSE/SSE2 converts Jan Beulich
2017-01-25 15:05 ` [PATCH 5/8] x86emul: support {,V}{,U}COMIS{S,D} Jan Beulich
2017-01-25 15:06 ` [PATCH 6/8] x86emul: support MMX/SSE/SSE2 insns with only register operands Jan Beulich
2017-01-25 15:06 ` [PATCH 7/8] x86emul: support {,V}{LD,ST}MXCSR Jan Beulich
2017-01-25 15:07 ` Jan Beulich [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5888CD440200007800133DC3@prv-mh.provo.novell.com \
    --to=jbeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.