All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] x86emul: support more SSE/AVX moves
@ 2016-12-14  9:50 Jan Beulich
  2016-12-14  9:55 ` [PATCH 1/3] x86emul: support load forms of {, V}MOV{D, Q} Jan Beulich
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Jan Beulich @ 2016-12-14  9:50 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

I think this then completes the set of simple move instructions the
emulator supports; the next step would then be to fully support
the SSEn/AVXn instruction sets.

1: support load forms of {,V}MOV{D,Q}
2: support {,V}LDDQU
3: support {,V}MOVNTDQA

Signed-off-by: Jan Beulich <jbeulich@suse.com>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] x86emul: support load forms of {, V}MOV{D, Q}
  2016-12-14  9:50 [PATCH 0/3] x86emul: support more SSE/AVX moves Jan Beulich
@ 2016-12-14  9:55 ` Jan Beulich
  2017-01-05 22:31   ` Andrew Cooper
  2016-12-14  9:56 ` [PATCH 2/3] x86emul: support {,V}LDDQU Jan Beulich
  2016-12-14  9:56 ` [PATCH 3/3] x86emul: support {,V}MOVNTDQA Jan Beulich
  2 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2016-12-14  9:55 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 12774 bytes --]

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -823,6 +823,29 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movq 32(%ecx),%xmm1...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movq_from_mem2);
+
+        asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n"
+                       put_insn(movq_from_mem2, "movq 32(%0), %%xmm1")
+                       :: "c" (NULL) );
+
+        set_insn(movq_from_mem2);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movq_from_mem2) )
+            goto fail;
+        asm ( "pcmpgtb %%xmm0, %%xmm0\n\t"
+              "pcmpeqb %%xmm1, %%xmm0\n\t"
+              "pmovmskb %%xmm0, %0" : "=r" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing vmovq %xmm1,32(%edx)...");
     if ( stack_exec && cpu_has_avx )
     {
@@ -847,6 +870,29 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing vmovq 32(%edx),%xmm0...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovq_from_mem);
+
+        asm volatile ( "pcmpeqb %%xmm0, %%xmm0\n"
+                       put_insn(vmovq_from_mem, "vmovq 32(%0), %%xmm0")
+                       :: "d" (NULL) );
+
+        set_insn(vmovq_from_mem);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovq_from_mem) )
+            goto fail;
+        asm ( "pcmpgtb %%xmm1, %%xmm1\n\t"
+              "pcmpeqb %%xmm0, %%xmm1\n\t"
+              "pmovmskb %%xmm1, %0" : "=r" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
     if ( stack_exec && cpu_has_sse2 )
     {
@@ -1083,6 +1129,33 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movd 32(%ecx),%mm4...");
+    if ( stack_exec && cpu_has_mmx )
+    {
+        decl_insn(movd_from_mem);
+
+        asm volatile ( "pcmpgtb %%mm4, %%mm4\n"
+                       put_insn(movd_from_mem, "movd 32(%0), %%mm4")
+                       :: "c" (NULL) );
+
+        set_insn(movd_from_mem);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movd_from_mem) )
+            goto fail;
+        asm ( "pxor %%mm2,%%mm2\n\t"
+              "pcmpeqb %%mm4, %%mm2\n\t"
+              "pmovmskb %%mm2, %0" : "=r" (rc) );
+        if ( rc != 0xf0 )
+            goto fail;
+        asm ( "pcmpeqb %%mm4, %%mm3\n\t"
+              "pmovmskb %%mm3, %0" : "=r" (rc) );
+        if ( rc != 0x0f )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movd %xmm2,32(%edx)...");
     if ( stack_exec && cpu_has_sse2 )
     {
@@ -1107,6 +1180,34 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movd 32(%edx),%xmm3...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movd_from_mem2);
+
+        asm volatile ( "pcmpeqb %%xmm3, %%xmm3\n"
+                       put_insn(movd_from_mem2, "movd 32(%0), %%xmm3")
+                       :: "d" (NULL) );
+
+        set_insn(movd_from_mem2);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movd_from_mem2) )
+            goto fail;
+        asm ( "pxor %%xmm1,%%xmm1\n\t"
+              "pcmpeqb %%xmm3, %%xmm1\n\t"
+              "pmovmskb %%xmm1, %0" : "=r" (rc) );
+        if ( rc != 0xfff0 )
+            goto fail;
+        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+              "pcmpeqb %%xmm3, %%xmm2\n\t"
+              "pmovmskb %%xmm2, %0" : "=r" (rc) );
+        if ( rc != 0x000f )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing vmovd %xmm1,32(%ecx)...");
     if ( stack_exec && cpu_has_avx )
     {
@@ -1131,6 +1232,34 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing vmovd 32(%ecx),%xmm2...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovd_from_mem);
+
+        asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+                       put_insn(vmovd_from_mem, "vmovd 32(%0), %%xmm2")
+                       :: "c" (NULL) );
+
+        set_insn(vmovd_from_mem);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovd_from_mem) )
+            goto fail;
+        asm ( "pxor %%xmm0,%%xmm0\n\t"
+              "pcmpeqb %%xmm2, %%xmm0\n\t"
+              "pmovmskb %%xmm0, %0" : "=r" (rc) );
+        if ( rc != 0xfff0 )
+            goto fail;
+        asm ( "pcmpeqb %%xmm1, %%xmm1\n\t"
+              "pcmpeqb %%xmm2, %%xmm1\n\t"
+              "pmovmskb %%xmm1, %0" : "=r" (rc) );
+        if ( rc != 0x000f )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movd %mm3,%ebx...");
     if ( stack_exec && cpu_has_mmx )
     {
@@ -1161,6 +1290,34 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movd %ebx,%mm4...");
+    if ( stack_exec && cpu_has_mmx )
+    {
+        decl_insn(movd_from_reg);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpgtb %%mm4, %%mm4\n"
+                       put_insn(movd_from_reg, "movd %%ebx, %%mm4")
+                       :: );
+
+        set_insn(movd_from_reg);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || !check_eip(movd_from_reg) )
+            goto fail;
+        asm ( "pxor %%mm2,%%mm2\n\t"
+              "pcmpeqb %%mm4, %%mm2\n\t"
+              "pmovmskb %%mm2, %0" : "=r" (rc) );
+        if ( rc != 0xf0 )
+            goto fail;
+        asm ( "pcmpeqb %%mm4, %%mm3\n\t"
+              "pmovmskb %%mm3, %0" : "=r" (rc) );
+        if ( rc != 0x0f )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movd %xmm2,%ebx...");
     if ( stack_exec && cpu_has_sse2 )
     {
@@ -1186,6 +1343,35 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movd %ebx,%xmm3...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movd_from_reg2);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpgtb %%xmm3, %%xmm3\n"
+                       put_insn(movd_from_reg2, "movd %%ebx, %%xmm3")
+                       :: );
+
+        set_insn(movd_from_reg2);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || !check_eip(movd_from_reg2) )
+            goto fail;
+        asm ( "pxor %%xmm1,%%xmm1\n\t"
+              "pcmpeqb %%xmm3, %%xmm1\n\t"
+              "pmovmskb %%xmm1, %0" : "=r" (rc) );
+        if ( rc != 0xfff0 )
+            goto fail;
+        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+              "pcmpeqb %%xmm3, %%xmm2\n\t"
+              "pmovmskb %%xmm2, %0" : "=r" (rc) );
+        if ( rc != 0x000f )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing vmovd %xmm1,%ebx...");
     if ( stack_exec && cpu_has_avx )
     {
@@ -1208,6 +1394,35 @@ int main(int argc, char **argv)
             goto fail;
         printf("okay\n");
     }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovd %ebx,%xmm2...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovd_from_reg);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpgtb %%xmm2, %%xmm2\n"
+                       put_insn(vmovd_from_reg, "vmovd %%ebx, %%xmm2")
+                       :: );
+
+        set_insn(vmovd_from_reg);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || !check_eip(vmovd_from_reg) )
+            goto fail;
+        asm ( "pxor %%xmm0,%%xmm0\n\t"
+              "pcmpeqb %%xmm2, %%xmm0\n\t"
+              "pmovmskb %%xmm0, %0" : "=r" (rc) );
+        if ( rc != 0xfff0 )
+            goto fail;
+        asm ( "pcmpeqb %%xmm1, %%xmm1\n\t"
+              "pcmpeqb %%xmm2, %%xmm1\n\t"
+              "pmovmskb %%xmm1, %0" : "=r" (rc) );
+        if ( rc != 0x000f )
+            goto fail;
+        printf("okay\n");
+    }
     else
         printf("skipped\n");
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4995,6 +4995,12 @@ x86_emulate(
                                          /* vmovntdq ymm,m256 */
         fail_if(ea.type != OP_MEM);
         /* fall through */
+    case X86EMUL_OPC(0x0f, 0x6e):        /* movd r/m32,mm */
+                                         /* movq r/m64,mm */
+    case X86EMUL_OPC_66(0x0f, 0x6e):     /* movd r/m32,xmm */
+                                         /* movq r/m64,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmovd r/m32,xmm */
+                                         /* vmovq r/m64,xmm */
     case X86EMUL_OPC(0x0f, 0x6f):        /* movq mm/m64,mm */
     case X86EMUL_OPC_66(0x0f, 0x6f):     /* movdqa xmm/m128,xmm */
     case X86EMUL_OPC_F3(0x0f, 0x6f):     /* movdqu xmm/m128,xmm */
@@ -5008,6 +5014,8 @@ x86_emulate(
                                          /* movq xmm,r/m64 */
     case X86EMUL_OPC_VEX_66(0x0f, 0x7e): /* vmovd xmm,r/m32 */
                                          /* vmovq xmm,r/m64 */
+    case X86EMUL_OPC_F3(0x0f, 0x7e):     /* movq xmm/m64,xmm */
+    case X86EMUL_OPC_VEX_F3(0x0f, 0x7e): /* vmovq xmm/m64,xmm */
     case X86EMUL_OPC(0x0f, 0x7f):        /* movq mm,mm/m64 */
     case X86EMUL_OPC_66(0x0f, 0x7f):     /* movdqa xmm,xmm/m128 */
     case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa xmm,xmm/m128 */
@@ -5019,6 +5027,7 @@ x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
     {
         uint8_t *buf = get_stub(stub);
+        bool load = false;
 
         fic.insn_bytes = 5;
         buf[0] = 0x3e;
@@ -5062,8 +5071,20 @@ x86_emulate(
         {
         case 0x7e:
             generate_exception_if(vex.l, EXC_UD);
-            ea.bytes = op_bytes;
+            if ( vex.pfx != vex_f3 )
+                ea.bytes = op_bytes;
+            else
+            {
+                buf[0] = 0xf3;
+                ea.bytes = 8;
+                /* fall through */
+        case 0x6f:
+                load = true;
+            }
             break;
+        case 0x6e:
+            load = true;
+            /* fall through */
         case 0xd6:
             generate_exception_if(vex.l, EXC_UD);
             ea.bytes = 8;
@@ -5081,13 +5102,13 @@ x86_emulate(
                                   !is_aligned(ea.mem.seg, ea.mem.off, ea.bytes,
                                               ctxt, ops),
                                   EXC_GP, 0);
-            if ( b == 0x6f )
+            if ( load )
                 rc = ops->read(ea.mem.seg, ea.mem.off+0, mmvalp,
                                ea.bytes, ctxt);
             else
                 fail_if(!ops->write); /* Check before running the stub. */
         }
-        if ( ea.type == OP_MEM || b == 0x7e )
+        if ( ea.type == OP_MEM || ((b & ~0x10) == 0x6e && vex.pfx != vex_f3) )
         {
             /* Convert memory operand or GPR destination to (%rAX) */
             rex_prefix &= ~REX_B;
@@ -5095,7 +5116,7 @@ x86_emulate(
             buf[4] &= 0x38;
             if ( ea.type == OP_MEM )
                 ea.reg = (void *)mmvalp;
-            else /* Ensure zero-extension of a 32-bit result. */
+            else if ( !load ) /* Ensure zero-extension of a 32-bit result. */
                 *ea.reg = 0;
         }
         if ( !rc )
@@ -5106,7 +5127,7 @@ x86_emulate(
         }
         put_fpu(&fic);
         put_stub(stub);
-        if ( !rc && (b != 0x6f) && (ea.type == OP_MEM) )
+        if ( !rc && !load && (ea.type == OP_MEM) )
         {
             ASSERT(ops->write); /* See the fail_if() above. */
             rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,



[-- Attachment #2: x86emul-MOVQ-load.patch --]
[-- Type: text/plain, Size: 12817 bytes --]

x86emul: support load forms of {,V}MOV{D,Q}

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -823,6 +823,29 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movq 32(%ecx),%xmm1...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movq_from_mem2);
+
+        asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n"
+                       put_insn(movq_from_mem2, "movq 32(%0), %%xmm1")
+                       :: "c" (NULL) );
+
+        set_insn(movq_from_mem2);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movq_from_mem2) )
+            goto fail;
+        asm ( "pcmpgtb %%xmm0, %%xmm0\n\t"
+              "pcmpeqb %%xmm1, %%xmm0\n\t"
+              "pmovmskb %%xmm0, %0" : "=r" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing vmovq %xmm1,32(%edx)...");
     if ( stack_exec && cpu_has_avx )
     {
@@ -847,6 +870,29 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing vmovq 32(%edx),%xmm0...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovq_from_mem);
+
+        asm volatile ( "pcmpeqb %%xmm0, %%xmm0\n"
+                       put_insn(vmovq_from_mem, "vmovq 32(%0), %%xmm0")
+                       :: "d" (NULL) );
+
+        set_insn(vmovq_from_mem);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovq_from_mem) )
+            goto fail;
+        asm ( "pcmpgtb %%xmm1, %%xmm1\n\t"
+              "pcmpeqb %%xmm0, %%xmm1\n\t"
+              "pmovmskb %%xmm1, %0" : "=r" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
     if ( stack_exec && cpu_has_sse2 )
     {
@@ -1083,6 +1129,33 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movd 32(%ecx),%mm4...");
+    if ( stack_exec && cpu_has_mmx )
+    {
+        decl_insn(movd_from_mem);
+
+        asm volatile ( "pcmpgtb %%mm4, %%mm4\n"
+                       put_insn(movd_from_mem, "movd 32(%0), %%mm4")
+                       :: "c" (NULL) );
+
+        set_insn(movd_from_mem);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movd_from_mem) )
+            goto fail;
+        asm ( "pxor %%mm2,%%mm2\n\t"
+              "pcmpeqb %%mm4, %%mm2\n\t"
+              "pmovmskb %%mm2, %0" : "=r" (rc) );
+        if ( rc != 0xf0 )
+            goto fail;
+        asm ( "pcmpeqb %%mm4, %%mm3\n\t"
+              "pmovmskb %%mm3, %0" : "=r" (rc) );
+        if ( rc != 0x0f )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movd %xmm2,32(%edx)...");
     if ( stack_exec && cpu_has_sse2 )
     {
@@ -1107,6 +1180,34 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movd 32(%edx),%xmm3...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movd_from_mem2);
+
+        asm volatile ( "pcmpeqb %%xmm3, %%xmm3\n"
+                       put_insn(movd_from_mem2, "movd 32(%0), %%xmm3")
+                       :: "d" (NULL) );
+
+        set_insn(movd_from_mem2);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movd_from_mem2) )
+            goto fail;
+        asm ( "pxor %%xmm1,%%xmm1\n\t"
+              "pcmpeqb %%xmm3, %%xmm1\n\t"
+              "pmovmskb %%xmm1, %0" : "=r" (rc) );
+        if ( rc != 0xfff0 )
+            goto fail;
+        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+              "pcmpeqb %%xmm3, %%xmm2\n\t"
+              "pmovmskb %%xmm2, %0" : "=r" (rc) );
+        if ( rc != 0x000f )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing vmovd %xmm1,32(%ecx)...");
     if ( stack_exec && cpu_has_avx )
     {
@@ -1131,6 +1232,34 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing vmovd 32(%ecx),%xmm2...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovd_from_mem);
+
+        asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+                       put_insn(vmovd_from_mem, "vmovd 32(%0), %%xmm2")
+                       :: "c" (NULL) );
+
+        set_insn(vmovd_from_mem);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovd_from_mem) )
+            goto fail;
+        asm ( "pxor %%xmm0,%%xmm0\n\t"
+              "pcmpeqb %%xmm2, %%xmm0\n\t"
+              "pmovmskb %%xmm0, %0" : "=r" (rc) );
+        if ( rc != 0xfff0 )
+            goto fail;
+        asm ( "pcmpeqb %%xmm1, %%xmm1\n\t"
+              "pcmpeqb %%xmm2, %%xmm1\n\t"
+              "pmovmskb %%xmm1, %0" : "=r" (rc) );
+        if ( rc != 0x000f )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movd %mm3,%ebx...");
     if ( stack_exec && cpu_has_mmx )
     {
@@ -1161,6 +1290,34 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movd %ebx,%mm4...");
+    if ( stack_exec && cpu_has_mmx )
+    {
+        decl_insn(movd_from_reg);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpgtb %%mm4, %%mm4\n"
+                       put_insn(movd_from_reg, "movd %%ebx, %%mm4")
+                       :: );
+
+        set_insn(movd_from_reg);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || !check_eip(movd_from_reg) )
+            goto fail;
+        asm ( "pxor %%mm2,%%mm2\n\t"
+              "pcmpeqb %%mm4, %%mm2\n\t"
+              "pmovmskb %%mm2, %0" : "=r" (rc) );
+        if ( rc != 0xf0 )
+            goto fail;
+        asm ( "pcmpeqb %%mm4, %%mm3\n\t"
+              "pmovmskb %%mm3, %0" : "=r" (rc) );
+        if ( rc != 0x0f )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movd %xmm2,%ebx...");
     if ( stack_exec && cpu_has_sse2 )
     {
@@ -1186,6 +1343,35 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movd %ebx,%xmm3...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movd_from_reg2);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpgtb %%xmm3, %%xmm3\n"
+                       put_insn(movd_from_reg2, "movd %%ebx, %%xmm3")
+                       :: );
+
+        set_insn(movd_from_reg2);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || !check_eip(movd_from_reg2) )
+            goto fail;
+        asm ( "pxor %%xmm1,%%xmm1\n\t"
+              "pcmpeqb %%xmm3, %%xmm1\n\t"
+              "pmovmskb %%xmm1, %0" : "=r" (rc) );
+        if ( rc != 0xfff0 )
+            goto fail;
+        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+              "pcmpeqb %%xmm3, %%xmm2\n\t"
+              "pmovmskb %%xmm2, %0" : "=r" (rc) );
+        if ( rc != 0x000f )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing vmovd %xmm1,%ebx...");
     if ( stack_exec && cpu_has_avx )
     {
@@ -1208,6 +1394,35 @@ int main(int argc, char **argv)
             goto fail;
         printf("okay\n");
     }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovd %ebx,%xmm2...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovd_from_reg);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpgtb %%xmm2, %%xmm2\n"
+                       put_insn(vmovd_from_reg, "vmovd %%ebx, %%xmm2")
+                       :: );
+
+        set_insn(vmovd_from_reg);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || !check_eip(vmovd_from_reg) )
+            goto fail;
+        asm ( "pxor %%xmm0,%%xmm0\n\t"
+              "pcmpeqb %%xmm2, %%xmm0\n\t"
+              "pmovmskb %%xmm0, %0" : "=r" (rc) );
+        if ( rc != 0xfff0 )
+            goto fail;
+        asm ( "pcmpeqb %%xmm1, %%xmm1\n\t"
+              "pcmpeqb %%xmm2, %%xmm1\n\t"
+              "pmovmskb %%xmm1, %0" : "=r" (rc) );
+        if ( rc != 0x000f )
+            goto fail;
+        printf("okay\n");
+    }
     else
         printf("skipped\n");
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4995,6 +4995,12 @@ x86_emulate(
                                          /* vmovntdq ymm,m256 */
         fail_if(ea.type != OP_MEM);
         /* fall through */
+    case X86EMUL_OPC(0x0f, 0x6e):        /* movd r/m32,mm */
+                                         /* movq r/m64,mm */
+    case X86EMUL_OPC_66(0x0f, 0x6e):     /* movd r/m32,xmm */
+                                         /* movq r/m64,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmovd r/m32,xmm */
+                                         /* vmovq r/m64,xmm */
     case X86EMUL_OPC(0x0f, 0x6f):        /* movq mm/m64,mm */
     case X86EMUL_OPC_66(0x0f, 0x6f):     /* movdqa xmm/m128,xmm */
     case X86EMUL_OPC_F3(0x0f, 0x6f):     /* movdqu xmm/m128,xmm */
@@ -5008,6 +5014,8 @@ x86_emulate(
                                          /* movq xmm,r/m64 */
     case X86EMUL_OPC_VEX_66(0x0f, 0x7e): /* vmovd xmm,r/m32 */
                                          /* vmovq xmm,r/m64 */
+    case X86EMUL_OPC_F3(0x0f, 0x7e):     /* movq xmm/m64,xmm */
+    case X86EMUL_OPC_VEX_F3(0x0f, 0x7e): /* vmovq xmm/m64,xmm */
     case X86EMUL_OPC(0x0f, 0x7f):        /* movq mm,mm/m64 */
     case X86EMUL_OPC_66(0x0f, 0x7f):     /* movdqa xmm,xmm/m128 */
     case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa xmm,xmm/m128 */
@@ -5019,6 +5027,7 @@ x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
     {
         uint8_t *buf = get_stub(stub);
+        bool load = false;
 
         fic.insn_bytes = 5;
         buf[0] = 0x3e;
@@ -5062,8 +5071,20 @@ x86_emulate(
         {
         case 0x7e:
             generate_exception_if(vex.l, EXC_UD);
-            ea.bytes = op_bytes;
+            if ( vex.pfx != vex_f3 )
+                ea.bytes = op_bytes;
+            else
+            {
+                buf[0] = 0xf3;
+                ea.bytes = 8;
+                /* fall through */
+        case 0x6f:
+                load = true;
+            }
             break;
+        case 0x6e:
+            load = true;
+            /* fall through */
         case 0xd6:
             generate_exception_if(vex.l, EXC_UD);
             ea.bytes = 8;
@@ -5081,13 +5102,13 @@ x86_emulate(
                                   !is_aligned(ea.mem.seg, ea.mem.off, ea.bytes,
                                               ctxt, ops),
                                   EXC_GP, 0);
-            if ( b == 0x6f )
+            if ( load )
                 rc = ops->read(ea.mem.seg, ea.mem.off+0, mmvalp,
                                ea.bytes, ctxt);
             else
                 fail_if(!ops->write); /* Check before running the stub. */
         }
-        if ( ea.type == OP_MEM || b == 0x7e )
+        if ( ea.type == OP_MEM || ((b & ~0x10) == 0x6e && vex.pfx != vex_f3) )
         {
             /* Convert memory operand or GPR destination to (%rAX) */
             rex_prefix &= ~REX_B;
@@ -5095,7 +5116,7 @@ x86_emulate(
             buf[4] &= 0x38;
             if ( ea.type == OP_MEM )
                 ea.reg = (void *)mmvalp;
-            else /* Ensure zero-extension of a 32-bit result. */
+            else if ( !load ) /* Ensure zero-extension of a 32-bit result. */
                 *ea.reg = 0;
         }
         if ( !rc )
@@ -5106,7 +5127,7 @@ x86_emulate(
         }
         put_fpu(&fic);
         put_stub(stub);
-        if ( !rc && (b != 0x6f) && (ea.type == OP_MEM) )
+        if ( !rc && !load && (ea.type == OP_MEM) )
         {
             ASSERT(ops->write); /* See the fail_if() above. */
             rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/3] x86emul: support {,V}LDDQU
  2016-12-14  9:50 [PATCH 0/3] x86emul: support more SSE/AVX moves Jan Beulich
  2016-12-14  9:55 ` [PATCH 1/3] x86emul: support load forms of {, V}MOV{D, Q} Jan Beulich
@ 2016-12-14  9:56 ` Jan Beulich
  2017-01-06 11:12   ` Andrew Cooper
  2016-12-14  9:56 ` [PATCH 3/3] x86emul: support {,V}MOVNTDQA Jan Beulich
  2 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2016-12-14  9:56 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 5996 bytes --]

Also take the opportunity and adjust the vmovdqu test case the new one
here has been cloned from: To zero a ymm register we don't need to go
through hoops, as 128-bit AVX insns zero the upper portion of the
destination register, and in the disabled AVX2 code there was a wrong
YMM register used.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -968,12 +968,7 @@ int main(int argc, char **argv)
     {
         decl_insn(vmovdqu_from_mem);
 
-#if 0 /* Don't use AVX2 instructions for now */
-        asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n"
-#else
-        asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t"
-                       "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n"
-#endif
+        asm volatile ( "vpxor %%xmm4, %%xmm4, %%xmm4\n"
                        put_insn(vmovdqu_from_mem, "vmovdqu (%0), %%ymm4")
                        :: "d" (NULL) );
 
@@ -987,7 +982,7 @@ int main(int argc, char **argv)
 #if 0 /* Don't use AVX2 instructions for now */
         asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
               "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
-              "vpmovmskb %%ymm1, %0" : "=r" (rc) );
+              "vpmovmskb %%ymm0, %0" : "=r" (rc) );
 #else
         asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
               "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
@@ -1404,6 +1399,67 @@ int main(int argc, char **argv)
         printf("skipped\n");
 #endif
 
+    printf("%-40s", "Testing lddqu 4(%edx),%xmm4...");
+    if ( stack_exec && cpu_has_sse3 )
+    {
+        decl_insn(lddqu);
+
+        asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
+                       put_insn(lddqu, "lddqu 4(%0), %%xmm4")
+                       :: "d" (NULL) );
+
+        set_insn(lddqu);
+        memset(res, 0x55, 64);
+        memset(res + 1, 0xff, 16);
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(lddqu) )
+            goto fail;
+        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+              "pcmpeqb %%xmm4, %%xmm2\n\t"
+              "pmovmskb %%xmm2, %0" : "=r" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vlddqu (%ecx),%ymm4...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vlddqu);
+
+        asm volatile ( "vpxor %%xmm4, %%xmm4, %%xmm4\n"
+                       put_insn(vlddqu, "vlddqu (%0), %%ymm4")
+                       :: "c" (NULL) );
+
+        set_insn(vlddqu);
+        memset(res + 1, 0xff, 32);
+        regs.ecx = (unsigned long)(res + 1);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vlddqu) )
+            goto fail;
+#if 0 /* Don't use AVX2 instructions for now */
+        asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+              "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+              "vpmovmskb %%ymm0, %0" : "=r" (rc) );
+#else
+        asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+              "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+              "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+              "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+              "vpmovmskb %%xmm0, %0\n\t"
+              "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) );
+        rc |= i << 16;
+#endif
+        if ( ~rc )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
 #undef decl_insn
 #undef put_insn
 #undef set_insn
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -71,6 +71,12 @@ static int cpuid(
     (edx & (1U << 26)) != 0; \
 })
 
+#define cpu_has_sse3 ({ \
+    unsigned int eax = 1, ecx = 0; \
+    emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
+    (ecx & (1U << 0)) != 0; \
+})
+
 #define cpu_has_xsave ({ \
     unsigned int eax = 1, ecx = 0; \
     emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4993,6 +4993,9 @@ x86_emulate(
     case X86EMUL_OPC_66(0x0f, 0xe7):     /* movntdq xmm,m128 */
     case X86EMUL_OPC_VEX_66(0x0f, 0xe7): /* vmovntdq xmm,m128 */
                                          /* vmovntdq ymm,m256 */
+    case X86EMUL_OPC_F2(0x0f, 0xf0):     /* lddqu xmm,m128 */
+    case X86EMUL_OPC_VEX_F2(0x0f, 0xf0): /* vlddqu xmm,m128 */
+                                         /* vlddqu ymm,m256 */
         fail_if(ea.type != OP_MEM);
         /* fall through */
     case X86EMUL_OPC(0x0f, 0x6e):        /* movd r/m32,mm */
@@ -5040,6 +5043,11 @@ x86_emulate(
         {
             switch ( vex.pfx )
             {
+            case vex_f2:
+                /* Converting lddqu to movdqa (see also below). */
+                vcpu_must_have(sse3);
+                buf[3] = 0x6f;
+                /* fall through */
             case vex_66:
             case vex_f3:
                 host_and_vcpu_must_have(sse2);
@@ -5056,8 +5064,6 @@ x86_emulate(
                 get_fpu(X86EMUL_FPU_mmx, &fic);
                 ea.bytes = 8;
                 break;
-            default:
-                goto cannot_emulate;
             }
         }
         else
@@ -5079,6 +5085,7 @@ x86_emulate(
                 ea.bytes = 8;
                 /* fall through */
         case 0x6f:
+        case 0xf0:
                 load = true;
             }
             break;
@@ -5094,7 +5101,7 @@ x86_emulate(
         {
             uint32_t mxcsr = 0;
 
-            if ( ea.bytes < 16 || vex.pfx == vex_f3 )
+            if ( ea.bytes < 16 || vex.pfx >= vex_f3 )
                 mxcsr = MXCSR_MM;
             else if ( vcpu_has_misalignsse() )
                 asm ( "stmxcsr %0" : "=m" (mxcsr) );



[-- Attachment #2: x86emul-LDDQU.patch --]
[-- Type: text/plain, Size: 6022 bytes --]

x86emul: support {,V}LDDQU

Also take the opportunity and adjust the vmovdqu test case the new one
here has been cloned from: To zero a ymm register we don't need to go
through hoops, as 128-bit AVX insns zero the upper portion of the
destination register, and in the disabled AVX2 code there was a wrong
YMM register used.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -968,12 +968,7 @@ int main(int argc, char **argv)
     {
         decl_insn(vmovdqu_from_mem);
 
-#if 0 /* Don't use AVX2 instructions for now */
-        asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n"
-#else
-        asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t"
-                       "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n"
-#endif
+        asm volatile ( "vpxor %%xmm4, %%xmm4, %%xmm4\n"
                        put_insn(vmovdqu_from_mem, "vmovdqu (%0), %%ymm4")
                        :: "d" (NULL) );
 
@@ -987,7 +982,7 @@ int main(int argc, char **argv)
 #if 0 /* Don't use AVX2 instructions for now */
         asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
               "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
-              "vpmovmskb %%ymm1, %0" : "=r" (rc) );
+              "vpmovmskb %%ymm0, %0" : "=r" (rc) );
 #else
         asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
               "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
@@ -1404,6 +1399,67 @@ int main(int argc, char **argv)
         printf("skipped\n");
 #endif
 
+    printf("%-40s", "Testing lddqu 4(%edx),%xmm4...");
+    if ( stack_exec && cpu_has_sse3 )
+    {
+        decl_insn(lddqu);
+
+        asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
+                       put_insn(lddqu, "lddqu 4(%0), %%xmm4")
+                       :: "d" (NULL) );
+
+        set_insn(lddqu);
+        memset(res, 0x55, 64);
+        memset(res + 1, 0xff, 16);
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(lddqu) )
+            goto fail;
+        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+              "pcmpeqb %%xmm4, %%xmm2\n\t"
+              "pmovmskb %%xmm2, %0" : "=r" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vlddqu (%ecx),%ymm4...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vlddqu);
+
+        asm volatile ( "vpxor %%xmm4, %%xmm4, %%xmm4\n"
+                       put_insn(vlddqu, "vlddqu (%0), %%ymm4")
+                       :: "c" (NULL) );
+
+        set_insn(vlddqu);
+        memset(res + 1, 0xff, 32);
+        regs.ecx = (unsigned long)(res + 1);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vlddqu) )
+            goto fail;
+#if 0 /* Don't use AVX2 instructions for now */
+        asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+              "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+              "vpmovmskb %%ymm0, %0" : "=r" (rc) );
+#else
+        asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+              "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+              "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+              "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+              "vpmovmskb %%xmm0, %0\n\t"
+              "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) );
+        rc |= i << 16;
+#endif
+        if ( ~rc )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
 #undef decl_insn
 #undef put_insn
 #undef set_insn
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -71,6 +71,12 @@ static int cpuid(
     (edx & (1U << 26)) != 0; \
 })
 
+#define cpu_has_sse3 ({ \
+    unsigned int eax = 1, ecx = 0; \
+    emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
+    (ecx & (1U << 0)) != 0; \
+})
+
 #define cpu_has_xsave ({ \
     unsigned int eax = 1, ecx = 0; \
     emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4993,6 +4993,9 @@ x86_emulate(
     case X86EMUL_OPC_66(0x0f, 0xe7):     /* movntdq xmm,m128 */
     case X86EMUL_OPC_VEX_66(0x0f, 0xe7): /* vmovntdq xmm,m128 */
                                          /* vmovntdq ymm,m256 */
+    case X86EMUL_OPC_F2(0x0f, 0xf0):     /* lddqu xmm,m128 */
+    case X86EMUL_OPC_VEX_F2(0x0f, 0xf0): /* vlddqu xmm,m128 */
+                                         /* vlddqu ymm,m256 */
         fail_if(ea.type != OP_MEM);
         /* fall through */
     case X86EMUL_OPC(0x0f, 0x6e):        /* movd r/m32,mm */
@@ -5040,6 +5043,11 @@ x86_emulate(
         {
             switch ( vex.pfx )
             {
+            case vex_f2:
+                /* Converting lddqu to movdqa (see also below). */
+                vcpu_must_have(sse3);
+                buf[3] = 0x6f;
+                /* fall through */
             case vex_66:
             case vex_f3:
                 host_and_vcpu_must_have(sse2);
@@ -5056,8 +5064,6 @@ x86_emulate(
                 get_fpu(X86EMUL_FPU_mmx, &fic);
                 ea.bytes = 8;
                 break;
-            default:
-                goto cannot_emulate;
             }
         }
         else
@@ -5079,6 +5085,7 @@ x86_emulate(
                 ea.bytes = 8;
                 /* fall through */
         case 0x6f:
+        case 0xf0:
                 load = true;
             }
             break;
@@ -5094,7 +5101,7 @@ x86_emulate(
         {
             uint32_t mxcsr = 0;
 
-            if ( ea.bytes < 16 || vex.pfx == vex_f3 )
+            if ( ea.bytes < 16 || vex.pfx >= vex_f3 )
                 mxcsr = MXCSR_MM;
             else if ( vcpu_has_misalignsse() )
                 asm ( "stmxcsr %0" : "=m" (mxcsr) );

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 3/3] x86emul: support {,V}MOVNTDQA
  2016-12-14  9:50 [PATCH 0/3] x86emul: support more SSE/AVX moves Jan Beulich
  2016-12-14  9:55 ` [PATCH 1/3] x86emul: support load forms of {, V}MOV{D, Q} Jan Beulich
  2016-12-14  9:56 ` [PATCH 2/3] x86emul: support {,V}LDDQU Jan Beulich
@ 2016-12-14  9:56 ` Jan Beulich
  2 siblings, 0 replies; 7+ messages in thread
From: Jan Beulich @ 2016-12-14  9:56 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 5771 bytes --]

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -1608,6 +1608,74 @@ int main(int argc, char **argv)
             goto fail;
 #if 0 /* Don't use AVX2 instructions for now */
         asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+              "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+              "vpmovmskb %%ymm0, %0" : "=r" (rc) );
+#else
+        asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+              "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+              "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+              "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+              "vpmovmskb %%xmm0, %0\n\t"
+              "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) );
+        rc |= i << 16;
+#endif
+        if ( ~rc )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movntdqa 16(%edx),%xmm4...");
+    if ( stack_exec && cpu_has_sse4_1 )
+    {
+        decl_insn(movntdqa);
+
+        asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
+                       put_insn(movntdqa, "movntdqa 16(%0), %%xmm4")
+                       :: "d" (NULL) );
+
+        set_insn(movntdqa);
+        memset(res, 0x55, 64);
+        memset(res + 4, 0xff, 16);
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movntdqa) )
+            goto fail;
+        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+              "pcmpeqb %%xmm4, %%xmm2\n\t"
+              "pmovmskb %%xmm2, %0" : "=r" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovntdqa (%ecx),%ymm4...");
+    if ( stack_exec && cpu_has_avx2 )
+    {
+        decl_insn(vmovntdqa);
+
+#if 0 /* Don't use AVX2 instructions for now */
+        asm volatile ( "vpxor %%ymm4, %%ymm4, %%ymm4\n"
+                       put_insn(vmovntdqa, "vmovntdqa (%0), %%ymm4")
+                       :: "c" (NULL) );
+#else
+        asm volatile ( "vpxor %xmm4, %xmm4, %xmm4\n"
+                       put_insn(vmovntdqa,
+                                ".byte 0xc4, 0xe2, 0x7d, 0x2a, 0x21") );
+#endif
+
+        set_insn(vmovntdqa);
+        memset(res, 0x55, 96);
+        memset(res + 8, 0xff, 32);
+        regs.ecx = (unsigned long)(res + 8);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovntdqa) )
+            goto fail;
+#if 0 /* Don't use AVX2 instructions for now */
+        asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
               "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
               "vpmovmskb %%ymm0, %0" : "=r" (rc) );
 #else
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -77,6 +77,12 @@ static int cpuid(
     (ecx & (1U << 0)) != 0; \
 })
 
+#define cpu_has_sse4_1 ({ \
+    unsigned int eax = 1, ecx = 0; \
+    emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
+    (ecx & (1U << 19)) != 0; \
+})
+
 #define cpu_has_xsave ({ \
     unsigned int eax = 1, ecx = 0; \
     emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1298,6 +1298,7 @@ static bool vcpu_has(
 #define vcpu_has_sse2()        vcpu_has(         1, EDX, 26, ctxt, ops)
 #define vcpu_has_sse3()        vcpu_has(         1, ECX,  0, ctxt, ops)
 #define vcpu_has_cx16()        vcpu_has(         1, ECX, 13, ctxt, ops)
+#define vcpu_has_sse4_1()      vcpu_has(         1, ECX, 19, ctxt, ops)
 #define vcpu_has_sse4_2()      vcpu_has(         1, ECX, 20, ctxt, ops)
 #define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
 #define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
@@ -1305,6 +1306,7 @@ static bool vcpu_has(
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
+#define vcpu_has_avx2()        vcpu_has(         7, EBX,  5, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
 #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
 #define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
@@ -5005,6 +5007,7 @@ x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmovd r/m32,xmm */
                                          /* vmovq r/m64,xmm */
     case X86EMUL_OPC(0x0f, 0x6f):        /* movq mm/m64,mm */
+    movdqa:
     case X86EMUL_OPC_66(0x0f, 0x6f):     /* movdqa xmm/m128,xmm */
     case X86EMUL_OPC_F3(0x0f, 0x6f):     /* movdqu xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x6f): /* vmovdqa xmm/m128,xmm */
@@ -5442,6 +5445,22 @@ x86_emulate(
         }
         break;
 
+    case X86EMUL_OPC_66(0x0f38, 0x2a): /* movntdqa m128,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa m128,xmm */
+                                           /* vmovntdqa m256,ymm */
+        fail_if(ea.type != OP_MEM);
+        /* Ignore the non-temporal hint for now, using movdqa instead. */
+        b = 0x6f;
+        if ( !vex.opcx )
+            vcpu_must_have(sse4_1);
+        else
+        {
+            vex.opcx = vex_0f;
+            if ( vex.l )
+                vcpu_must_have(avx2);
+        }
+        goto movdqa;
+
     case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
     case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
         vcpu_must_have(movbe);



[-- Attachment #2: x86emul-MOVNTDQA.patch --]
[-- Type: text/plain, Size: 5800 bytes --]

x86emul: support {,V}MOVNTDQA

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -1608,6 +1608,74 @@ int main(int argc, char **argv)
             goto fail;
 #if 0 /* Don't use AVX2 instructions for now */
         asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+              "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+              "vpmovmskb %%ymm0, %0" : "=r" (rc) );
+#else
+        asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+              "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+              "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+              "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+              "vpmovmskb %%xmm0, %0\n\t"
+              "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) );
+        rc |= i << 16;
+#endif
+        if ( ~rc )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movntdqa 16(%edx),%xmm4...");
+    if ( stack_exec && cpu_has_sse4_1 )
+    {
+        decl_insn(movntdqa);
+
+        asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
+                       put_insn(movntdqa, "movntdqa 16(%0), %%xmm4")
+                       :: "d" (NULL) );
+
+        set_insn(movntdqa);
+        memset(res, 0x55, 64);
+        memset(res + 4, 0xff, 16);
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movntdqa) )
+            goto fail;
+        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+              "pcmpeqb %%xmm4, %%xmm2\n\t"
+              "pmovmskb %%xmm2, %0" : "=r" (rc) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovntdqa (%ecx),%ymm4...");
+    if ( stack_exec && cpu_has_avx2 )
+    {
+        decl_insn(vmovntdqa);
+
+#if 0 /* Don't use AVX2 instructions for now */
+        asm volatile ( "vpxor %%ymm4, %%ymm4, %%ymm4\n"
+                       put_insn(vmovntdqa, "vmovntdqa (%0), %%ymm4")
+                       :: "c" (NULL) );
+#else
+        asm volatile ( "vpxor %xmm4, %xmm4, %xmm4\n"
+                       put_insn(vmovntdqa,
+                                ".byte 0xc4, 0xe2, 0x7d, 0x2a, 0x21") );
+#endif
+
+        set_insn(vmovntdqa);
+        memset(res, 0x55, 96);
+        memset(res + 8, 0xff, 32);
+        regs.ecx = (unsigned long)(res + 8);
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovntdqa) )
+            goto fail;
+#if 0 /* Don't use AVX2 instructions for now */
+        asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
               "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
               "vpmovmskb %%ymm0, %0" : "=r" (rc) );
 #else
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -77,6 +77,12 @@ static int cpuid(
     (ecx & (1U << 0)) != 0; \
 })
 
+#define cpu_has_sse4_1 ({ \
+    unsigned int eax = 1, ecx = 0; \
+    emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
+    (ecx & (1U << 19)) != 0; \
+})
+
 #define cpu_has_xsave ({ \
     unsigned int eax = 1, ecx = 0; \
     emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1298,6 +1298,7 @@ static bool vcpu_has(
 #define vcpu_has_sse2()        vcpu_has(         1, EDX, 26, ctxt, ops)
 #define vcpu_has_sse3()        vcpu_has(         1, ECX,  0, ctxt, ops)
 #define vcpu_has_cx16()        vcpu_has(         1, ECX, 13, ctxt, ops)
+#define vcpu_has_sse4_1()      vcpu_has(         1, ECX, 19, ctxt, ops)
 #define vcpu_has_sse4_2()      vcpu_has(         1, ECX, 20, ctxt, ops)
 #define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
 #define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
@@ -1305,6 +1306,7 @@ static bool vcpu_has(
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
+#define vcpu_has_avx2()        vcpu_has(         7, EBX,  5, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
 #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
 #define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
@@ -5005,6 +5007,7 @@ x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmovd r/m32,xmm */
                                          /* vmovq r/m64,xmm */
     case X86EMUL_OPC(0x0f, 0x6f):        /* movq mm/m64,mm */
+    movdqa:
     case X86EMUL_OPC_66(0x0f, 0x6f):     /* movdqa xmm/m128,xmm */
     case X86EMUL_OPC_F3(0x0f, 0x6f):     /* movdqu xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x6f): /* vmovdqa xmm/m128,xmm */
@@ -5442,6 +5445,22 @@ x86_emulate(
         }
         break;
 
+    case X86EMUL_OPC_66(0x0f38, 0x2a): /* movntdqa m128,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa m128,xmm */
+                                           /* vmovntdqa m256,ymm */
+        fail_if(ea.type != OP_MEM);
+        /* Ignore the non-temporal hint for now, using movdqa instead. */
+        b = 0x6f;
+        if ( !vex.opcx )
+            vcpu_must_have(sse4_1);
+        else
+        {
+            vex.opcx = vex_0f;
+            if ( vex.l )
+                vcpu_must_have(avx2);
+        }
+        goto movdqa;
+
     case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
     case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
         vcpu_must_have(movbe);

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] x86emul: support load forms of {, V}MOV{D, Q}
  2016-12-14  9:55 ` [PATCH 1/3] x86emul: support load forms of {, V}MOV{D, Q} Jan Beulich
@ 2017-01-05 22:31   ` Andrew Cooper
  2017-01-06 13:35     ` Jan Beulich
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Cooper @ 2017-01-05 22:31 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 14/12/16 09:55, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -4995,6 +4995,12 @@ x86_emulate(
>                                           /* vmovntdq ymm,m256 */
>          fail_if(ea.type != OP_MEM);
>          /* fall through */
> +    case X86EMUL_OPC(0x0f, 0x6e):        /* movd r/m32,mm */
> +                                         /* movq r/m64,mm */
> +    case X86EMUL_OPC_66(0x0f, 0x6e):     /* movd r/m32,xmm */
> +                                         /* movq r/m64,xmm */
> +    case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmovd r/m32,xmm */
> +                                         /* vmovq r/m64,xmm */
>      case X86EMUL_OPC(0x0f, 0x6f):        /* movq mm/m64,mm */
>      case X86EMUL_OPC_66(0x0f, 0x6f):     /* movdqa xmm/m128,xmm */
>      case X86EMUL_OPC_F3(0x0f, 0x6f):     /* movdqu xmm/m128,xmm */
> @@ -5008,6 +5014,8 @@ x86_emulate(
>                                           /* movq xmm,r/m64 */
>      case X86EMUL_OPC_VEX_66(0x0f, 0x7e): /* vmovd xmm,r/m32 */
>                                           /* vmovq xmm,r/m64 */
> +    case X86EMUL_OPC_F3(0x0f, 0x7e):     /* movq xmm/m64,xmm */
> +    case X86EMUL_OPC_VEX_F3(0x0f, 0x7e): /* vmovq xmm/m64,xmm */
>      case X86EMUL_OPC(0x0f, 0x7f):        /* movq mm,mm/m64 */
>      case X86EMUL_OPC_66(0x0f, 0x7f):     /* movdqa xmm,xmm/m128 */
>      case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa xmm,xmm/m128 */
> @@ -5019,6 +5027,7 @@ x86_emulate(
>      case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
>      {
>          uint8_t *buf = get_stub(stub);
> +        bool load = false;

I am afraid that this logic is already almost-opaque, and these change
make it even more complicated to follow.  Sufficiently so that I can't
review it; I have tried and failed to look at the end result and
conclude whether it is correct or not.

(I have no specific reasons to think that it isn't correct, but I can't
claim to have reviewed it with integrity.)

This block of code in particular has a higher security risk than most in
x86_emulate(), because of the risk of accidentally executing a stub with
a modrm which selects anything other than SIMD register or (%rax) r/m
destination.

Therefore, I'd like to see what we can do to make the logic easier to
follow.


This block deals with 3 kinds of operations, load / move / store,
depending on the whether the source operand is memory, both operands are
registers, or the destination operand is memory.  Beyond that however,
there doesn't appear to be any consistency to the required adjustments
to make the stub safe.

At a start, vex.pfx continues to be a source of confusion as it refers
to legacy prefixes rather than the vex prefix, and vex.opcx being the
entity which refers to legacy/vex/evex/xop encoding.  Renaming these
constants alone would be a help.

I wonder if doing things like this would help?

enum { LOAD, MOVE, STORE } type;

switch ( b )
{
    case ...
        type = LOAD;
        break;
    case ...
        type = MOVE;
        break;
    case ...
        type = STORE;
        break;
}

In particular, having a 128 line case statement (before this patch, 149
after), with most semantics based on b == one of 5 (before, 7 after)
different raw numbers, is too much cognitive context to hold.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] x86emul: support {,V}LDDQU
  2016-12-14  9:56 ` [PATCH 2/3] x86emul: support {,V}LDDQU Jan Beulich
@ 2017-01-06 11:12   ` Andrew Cooper
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Cooper @ 2017-01-06 11:12 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 14/12/16 09:56, Jan Beulich wrote:
> Also take the opportunity and adjust the vmovdqu test case the new one
> here has been cloned from: To zero a ymm register we don't need to go
> through hoops, as 128-bit AVX insns zero the upper portion of the
> destination register, and in the disabled AVX2 code there was a wrong
> YMM register used.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

This change in isolation does look correct, but it does further add to
the readability/comprehendability problems I raised in the previous patch.

As long as we can suitably address those issues, this should be fine.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] x86emul: support load forms of {, V}MOV{D, Q}
  2017-01-05 22:31   ` Andrew Cooper
@ 2017-01-06 13:35     ` Jan Beulich
  0 siblings, 0 replies; 7+ messages in thread
From: Jan Beulich @ 2017-01-06 13:35 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 05.01.17 at 23:31, <andrew.cooper3@citrix.com> wrote:
> On 14/12/16 09:55, Jan Beulich wrote:
>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>> @@ -4995,6 +4995,12 @@ x86_emulate(
>>                                           /* vmovntdq ymm,m256 */
>>          fail_if(ea.type != OP_MEM);
>>          /* fall through */
>> +    case X86EMUL_OPC(0x0f, 0x6e):        /* movd r/m32,mm */
>> +                                         /* movq r/m64,mm */
>> +    case X86EMUL_OPC_66(0x0f, 0x6e):     /* movd r/m32,xmm */
>> +                                         /* movq r/m64,xmm */
>> +    case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmovd r/m32,xmm */
>> +                                         /* vmovq r/m64,xmm */
>>      case X86EMUL_OPC(0x0f, 0x6f):        /* movq mm/m64,mm */
>>      case X86EMUL_OPC_66(0x0f, 0x6f):     /* movdqa xmm/m128,xmm */
>>      case X86EMUL_OPC_F3(0x0f, 0x6f):     /* movdqu xmm/m128,xmm */
>> @@ -5008,6 +5014,8 @@ x86_emulate(
>>                                           /* movq xmm,r/m64 */
>>      case X86EMUL_OPC_VEX_66(0x0f, 0x7e): /* vmovd xmm,r/m32 */
>>                                           /* vmovq xmm,r/m64 */
>> +    case X86EMUL_OPC_F3(0x0f, 0x7e):     /* movq xmm/m64,xmm */
>> +    case X86EMUL_OPC_VEX_F3(0x0f, 0x7e): /* vmovq xmm/m64,xmm */
>>      case X86EMUL_OPC(0x0f, 0x7f):        /* movq mm,mm/m64 */
>>      case X86EMUL_OPC_66(0x0f, 0x7f):     /* movdqa xmm,xmm/m128 */
>>      case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa xmm,xmm/m128 */
>> @@ -5019,6 +5027,7 @@ x86_emulate(
>>      case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
>>      {
>>          uint8_t *buf = get_stub(stub);
>> +        bool load = false;
> 
> I am afraid that this logic is already almost-opaque, and these change
> make it even more complicated to follow.  Sufficiently so that I can't
> review it; I have tried and failed to look at the end result and
> conclude whether it is correct or not.
> 
> (I have no specific reasons to think that it isn't correct, but I can't
> claim to have reviewed it with integrity.)
> 
> This block of code in particular has a higher security risk than most in
> x86_emulate(), because of the risk of accidentally executing a stub with
> a modrm which selects anything other than SIMD register or (%rax) r/m
> destination.
> 
> Therefore, I'd like to see what we can do to make the logic easier to
> follow.
> 
> 
> This block deals with 3 kinds of operations, load / move / store,
> depending on the whether the source operand is memory, both operands are
> registers, or the destination operand is memory.  Beyond that however,
> there doesn't appear to be any consistency to the required adjustments
> to make the stub safe.
> 
> At a start, vex.pfx continues to be a source of confusion as it refers
> to legacy prefixes rather than the vex prefix, and vex.opcx being the
> entity which refers to legacy/vex/evex/xop encoding.  Renaming these
> constants alone would be a help.
> 
> I wonder if doing things like this would help?
> 
> enum { LOAD, MOVE, STORE } type;

With what I'm in the process of doing right now, some of this
should become easier (without the need for an enum like the one
above). I can see to re-order patches once the widening of
SSE/AVX support has grown far enough, but right now I have to
admit that my plan was to leave this particular case statement
mostly alone for the time being (whereas I've already shrunk
down the one dealing with MOVAPS & Co).

Jan

> switch ( b )
> {
>     case ...
>         type = LOAD;
>         break;
>     case ...
>         type = MOVE;
>         break;
>     case ...
>         type = STORE;
>         break;
> }
> 
> In particular, having a 128 line case statement (before this patch, 149
> after), with most semantics based on b == one of 5 (before, 7 after)
> different raw numbers, is too much cognitive context to hold.
> 
> ~Andrew




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-01-06 13:36 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-14  9:50 [PATCH 0/3] x86emul: support more SSE/AVX moves Jan Beulich
2016-12-14  9:55 ` [PATCH 1/3] x86emul: support load forms of {, V}MOV{D, Q} Jan Beulich
2017-01-05 22:31   ` Andrew Cooper
2017-01-06 13:35     ` Jan Beulich
2016-12-14  9:56 ` [PATCH 2/3] x86emul: support {,V}LDDQU Jan Beulich
2017-01-06 11:12   ` Andrew Cooper
2016-12-14  9:56 ` [PATCH 3/3] x86emul: support {,V}MOVNTDQA Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.