xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] x86: instruction emulator improvements
@ 2016-03-11 17:29 Jan Beulich
  2016-03-11 17:33 ` [PATCH 1/3] x86: rename XMM* features to SSE* Jan Beulich
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Jan Beulich @ 2016-03-11 17:29 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

1: x86: rename XMM* features to SSE*
2: x86emul: check host features alongside guest ones where needed
3: x86emul: support MOVBE and CRC32

Signed-off-by: Jan Beulich <jbeulich@suse.com>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] x86: rename XMM* features to SSE*
  2016-03-11 17:29 [PATCH 0/3] x86: instruction emulator improvements Jan Beulich
@ 2016-03-11 17:33 ` Jan Beulich
  2016-03-11 17:34 ` [PATCH 2/3] x86emul: check host features alongside guest ones where needed Jan Beulich
  2016-03-11 17:35 ` [PATCH 3/3] x86emul: support MOVBE and CRC32 Jan Beulich
  2 siblings, 0 replies; 7+ messages in thread
From: Jan Beulich @ 2016-03-11 17:33 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 5217 bytes --]

The latter are their canonical names, used already in the instruction
emulator.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -205,12 +205,12 @@ static void __init early_cpu_detect(void
 		c->x86_model += ((eax >> 16) & 0xF) << 4;
 	c->x86_mask = eax & 15;
 	edx &= ~cleared_caps[cpufeat_word(X86_FEATURE_FPU)];
-	ecx &= ~cleared_caps[cpufeat_word(X86_FEATURE_XMM3)];
+	ecx &= ~cleared_caps[cpufeat_word(X86_FEATURE_SSE3)];
 	if (edx & cpufeat_mask(X86_FEATURE_CLFLUSH))
 		c->x86_cache_alignment = ((ebx >> 8) & 0xff) * 8;
 	/* Leaf 0x1 capabilities filled in early for Xen. */
 	c->x86_capability[cpufeat_word(X86_FEATURE_FPU)] = edx;
-	c->x86_capability[cpufeat_word(X86_FEATURE_XMM3)] = ecx;
+	c->x86_capability[cpufeat_word(X86_FEATURE_SSE3)] = ecx;
 
 	if ( cpuid_eax(0x80000000) >= 0x80000008 )
 		paddr_bits = cpuid_eax(0x80000008) & 0xff;
@@ -249,7 +249,7 @@ static void generic_identify(struct cpui
 	c->cpuid_level = cpuid_eax(0);
 	cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
 	c->x86_capability[cpufeat_word(X86_FEATURE_FPU)] = edx;
-	c->x86_capability[cpufeat_word(X86_FEATURE_XMM3)] = ecx;
+	c->x86_capability[cpufeat_word(X86_FEATURE_SSE3)] = ecx;
 
 	if ( cpu_has(c, X86_FEATURE_CLFLUSH) )
 		c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2029,7 +2029,7 @@ unsigned long hvm_cr4_guest_reserved_bit
              X86_CR4_PCE |
              (leaf1_edx & cpufeat_mask(X86_FEATURE_FXSR) ?
               X86_CR4_OSFXSR : 0) |
-             (leaf1_edx & cpufeat_mask(X86_FEATURE_XMM) ?
+             (leaf1_edx & cpufeat_mask(X86_FEATURE_SSE) ?
               X86_CR4_OSXMMEXCPT : 0) |
              ((restore || nestedhvm_enabled(v->domain)) &&
               (leaf1_ecx & cpufeat_mask(X86_FEATURE_VMXE)) ?
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1963,7 +1963,7 @@ int nvmx_msr_read_intercept(unsigned int
             data |= X86_CR4_PGE;
         if ( edx & cpufeat_mask(X86_FEATURE_FXSR) )
             data |= X86_CR4_OSFXSR;
-        if ( edx & cpufeat_mask(X86_FEATURE_XMM) )
+        if ( edx & cpufeat_mask(X86_FEATURE_SSE) )
             data |= X86_CR4_OSXMMEXCPT;
         if ( ecx & cpufeat_mask(X86_FEATURE_VMXE) )
             data |= X86_CR4_VMXE;
--- a/xen/include/asm-x86/amd.h
+++ b/xen/include/asm-x86/amd.h
@@ -22,7 +22,7 @@
 	cpufeat_mask(X86_FEATURE_CMOV)  | cpufeat_mask(X86_FEATURE_PAT)    | \
 	cpufeat_mask(X86_FEATURE_PSE36) | cpufeat_mask(X86_FEATURE_CLFLUSH)| \
 	cpufeat_mask(X86_FEATURE_MMX)   | cpufeat_mask(X86_FEATURE_FXSR)   | \
-	cpufeat_mask(X86_FEATURE_XMM)   | cpufeat_mask(X86_FEATURE_XMM2))
+	cpufeat_mask(X86_FEATURE_SSE)   | cpufeat_mask(X86_FEATURE_SSE2))
 #define AMD_EXTFEATURES_K8_REV_C_ECX  0
 #define AMD_EXTFEATURES_K8_REV_C_EDX  (					       \
 	cpufeat_mask(X86_FEATURE_FPU)	   | cpufeat_mask(X86_FEATURE_VME)   | \
@@ -48,7 +48,7 @@
 
 /* Family 0Fh, Revision E */
 #define AMD_FEATURES_K8_REV_E_ECX        (AMD_FEATURES_K8_REV_D_ECX |	\
-	cpufeat_mask(X86_FEATURE_XMM3))
+	cpufeat_mask(X86_FEATURE_SSE3))
 #define AMD_FEATURES_K8_REV_E_EDX        (AMD_FEATURES_K8_REV_D_EDX | 	\
 	cpufeat_mask(X86_FEATURE_HT))
 #define AMD_EXTFEATURES_K8_REV_E_ECX     (AMD_EXTFEATURES_K8_REV_D_ECX |\
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -38,8 +38,8 @@
 #define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */
 #define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
 				          /* of FPU context), and CR4.OSFXSR available */
-#define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */
-#define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SSE		(0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_SSE2	(0*32+26) /* Streaming SIMD Extensions-2 */
 #define X86_FEATURE_SELFSNOOP	(0*32+27) /* CPU self snoop */
 #define X86_FEATURE_HT		(0*32+28) /* Hyper-Threading */
 #define X86_FEATURE_ACC		(0*32+29) /* Automatic clock control */
@@ -78,7 +78,7 @@
 #define X86_FEATURE_APERFMPERF   (3*32+16) /* APERFMPERF */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
+#define X86_FEATURE_SSE3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
 #define X86_FEATURE_PCLMULQDQ	(4*32+ 1) /* Carry-less mulitplication */
 #define X86_FEATURE_DTES64	(4*32+ 2) /* 64-bit Debug Store */
 #define X86_FEATURE_MWAIT	(4*32+ 3) /* Monitor/Mwait support */
@@ -183,7 +183,9 @@
 #define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
 #define cpu_has_mtrr		1
 #define cpu_has_mmx		1
-#define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
+#define cpu_has_sse		boot_cpu_has(X86_FEATURE_SSE)
+#define cpu_has_sse2		boot_cpu_has(X86_FEATURE_SSE2)
+#define cpu_has_sse3		boot_cpu_has(X86_FEATURE_SSE3)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_mp		1
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)



[-- Attachment #2: x86-feature-xmmN-sseN.patch --]
[-- Type: text/plain, Size: 5250 bytes --]

x86: rename XMM* features to SSE*

The latter are their canonical names, used already in the instruction
emulator.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -205,12 +205,12 @@ static void __init early_cpu_detect(void
 		c->x86_model += ((eax >> 16) & 0xF) << 4;
 	c->x86_mask = eax & 15;
 	edx &= ~cleared_caps[cpufeat_word(X86_FEATURE_FPU)];
-	ecx &= ~cleared_caps[cpufeat_word(X86_FEATURE_XMM3)];
+	ecx &= ~cleared_caps[cpufeat_word(X86_FEATURE_SSE3)];
 	if (edx & cpufeat_mask(X86_FEATURE_CLFLUSH))
 		c->x86_cache_alignment = ((ebx >> 8) & 0xff) * 8;
 	/* Leaf 0x1 capabilities filled in early for Xen. */
 	c->x86_capability[cpufeat_word(X86_FEATURE_FPU)] = edx;
-	c->x86_capability[cpufeat_word(X86_FEATURE_XMM3)] = ecx;
+	c->x86_capability[cpufeat_word(X86_FEATURE_SSE3)] = ecx;
 
 	if ( cpuid_eax(0x80000000) >= 0x80000008 )
 		paddr_bits = cpuid_eax(0x80000008) & 0xff;
@@ -249,7 +249,7 @@ static void generic_identify(struct cpui
 	c->cpuid_level = cpuid_eax(0);
 	cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
 	c->x86_capability[cpufeat_word(X86_FEATURE_FPU)] = edx;
-	c->x86_capability[cpufeat_word(X86_FEATURE_XMM3)] = ecx;
+	c->x86_capability[cpufeat_word(X86_FEATURE_SSE3)] = ecx;
 
 	if ( cpu_has(c, X86_FEATURE_CLFLUSH) )
 		c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2029,7 +2029,7 @@ unsigned long hvm_cr4_guest_reserved_bit
              X86_CR4_PCE |
              (leaf1_edx & cpufeat_mask(X86_FEATURE_FXSR) ?
               X86_CR4_OSFXSR : 0) |
-             (leaf1_edx & cpufeat_mask(X86_FEATURE_XMM) ?
+             (leaf1_edx & cpufeat_mask(X86_FEATURE_SSE) ?
               X86_CR4_OSXMMEXCPT : 0) |
              ((restore || nestedhvm_enabled(v->domain)) &&
               (leaf1_ecx & cpufeat_mask(X86_FEATURE_VMXE)) ?
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1963,7 +1963,7 @@ int nvmx_msr_read_intercept(unsigned int
             data |= X86_CR4_PGE;
         if ( edx & cpufeat_mask(X86_FEATURE_FXSR) )
             data |= X86_CR4_OSFXSR;
-        if ( edx & cpufeat_mask(X86_FEATURE_XMM) )
+        if ( edx & cpufeat_mask(X86_FEATURE_SSE) )
             data |= X86_CR4_OSXMMEXCPT;
         if ( ecx & cpufeat_mask(X86_FEATURE_VMXE) )
             data |= X86_CR4_VMXE;
--- a/xen/include/asm-x86/amd.h
+++ b/xen/include/asm-x86/amd.h
@@ -22,7 +22,7 @@
 	cpufeat_mask(X86_FEATURE_CMOV)  | cpufeat_mask(X86_FEATURE_PAT)    | \
 	cpufeat_mask(X86_FEATURE_PSE36) | cpufeat_mask(X86_FEATURE_CLFLUSH)| \
 	cpufeat_mask(X86_FEATURE_MMX)   | cpufeat_mask(X86_FEATURE_FXSR)   | \
-	cpufeat_mask(X86_FEATURE_XMM)   | cpufeat_mask(X86_FEATURE_XMM2))
+	cpufeat_mask(X86_FEATURE_SSE)   | cpufeat_mask(X86_FEATURE_SSE2))
 #define AMD_EXTFEATURES_K8_REV_C_ECX  0
 #define AMD_EXTFEATURES_K8_REV_C_EDX  (					       \
 	cpufeat_mask(X86_FEATURE_FPU)	   | cpufeat_mask(X86_FEATURE_VME)   | \
@@ -48,7 +48,7 @@
 
 /* Family 0Fh, Revision E */
 #define AMD_FEATURES_K8_REV_E_ECX        (AMD_FEATURES_K8_REV_D_ECX |	\
-	cpufeat_mask(X86_FEATURE_XMM3))
+	cpufeat_mask(X86_FEATURE_SSE3))
 #define AMD_FEATURES_K8_REV_E_EDX        (AMD_FEATURES_K8_REV_D_EDX | 	\
 	cpufeat_mask(X86_FEATURE_HT))
 #define AMD_EXTFEATURES_K8_REV_E_ECX     (AMD_EXTFEATURES_K8_REV_D_ECX |\
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -38,8 +38,8 @@
 #define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */
 #define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
 				          /* of FPU context), and CR4.OSFXSR available */
-#define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */
-#define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SSE		(0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_SSE2	(0*32+26) /* Streaming SIMD Extensions-2 */
 #define X86_FEATURE_SELFSNOOP	(0*32+27) /* CPU self snoop */
 #define X86_FEATURE_HT		(0*32+28) /* Hyper-Threading */
 #define X86_FEATURE_ACC		(0*32+29) /* Automatic clock control */
@@ -78,7 +78,7 @@
 #define X86_FEATURE_APERFMPERF   (3*32+16) /* APERFMPERF */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
+#define X86_FEATURE_SSE3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
 #define X86_FEATURE_PCLMULQDQ	(4*32+ 1) /* Carry-less mulitplication */
 #define X86_FEATURE_DTES64	(4*32+ 2) /* 64-bit Debug Store */
 #define X86_FEATURE_MWAIT	(4*32+ 3) /* Monitor/Mwait support */
@@ -183,7 +183,9 @@
 #define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
 #define cpu_has_mtrr		1
 #define cpu_has_mmx		1
-#define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
+#define cpu_has_sse		boot_cpu_has(X86_FEATURE_SSE)
+#define cpu_has_sse2		boot_cpu_has(X86_FEATURE_SSE2)
+#define cpu_has_sse3		boot_cpu_has(X86_FEATURE_SSE3)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_mp		1
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/3] x86emul: check host features alongside guest ones where needed
  2016-03-11 17:29 [PATCH 0/3] x86: instruction emulator improvements Jan Beulich
  2016-03-11 17:33 ` [PATCH 1/3] x86: rename XMM* features to SSE* Jan Beulich
@ 2016-03-11 17:34 ` Jan Beulich
  2016-03-11 17:41   ` Andrew Cooper
  2016-03-11 17:35 ` [PATCH 3/3] x86emul: support MOVBE and CRC32 Jan Beulich
  2 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2016-03-11 17:34 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 4296 bytes --]

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1093,6 +1093,22 @@ static bool_t vcpu_has(
 #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
 #define vcpu_must_have_avx()  vcpu_must_have(0x00000001, ECX, 28)
 
+#ifdef __XEN__
+/*
+ * Note the (subtle?) difference between vcpu_must_have_<feature>() and
+ * vcpu_must_have(<feature>): The former only checks guest feature flags,
+ * while the latter also checks host ones, i.e. is required to be used when
+ * emulation code is using the same instruction class for carrying out the
+ * actual operation).
+ */
+#define host_and_vcpu_must_have(feat) ({ \
+    generate_exception_if(!cpu_has_##feat, EXC_UD, -1); \
+    vcpu_must_have_##feat(); \
+})
+#else
+#define host_and_vcpu_must_have(feat) vcpu_must_have_##feat()
+#endif
+
 static int
 in_longmode(
     struct x86_emulate_ctxt *ctxt,
@@ -3102,7 +3118,7 @@ x86_emulate(
                 emulate_fpu_insn_memsrc("fildl", src.val);
                 break;
             case 1: /* fisttp m32i */
-                vcpu_must_have_sse3();
+                host_and_vcpu_must_have(sse3);
                 ea.bytes = 4;
                 dst = ea;
                 dst.type = OP_MEM;
@@ -3211,7 +3227,7 @@ x86_emulate(
                 emulate_fpu_insn_memsrc("fldl", src.val);
                 break;
             case 1: /* fisttp m64i */
-                vcpu_must_have_sse3();
+                host_and_vcpu_must_have(sse3);
                 ea.bytes = 8;
                 dst = ea;
                 dst.type = OP_MEM;
@@ -3319,7 +3335,7 @@ x86_emulate(
                 emulate_fpu_insn_memsrc("filds", src.val);
                 break;
             case 1: /* fisttp m16i */
-                vcpu_must_have_sse3();
+                host_and_vcpu_must_have(sse3);
                 ea.bytes = 2;
                 dst = ea;
                 dst.type = OP_MEM;
@@ -4115,9 +4131,9 @@ x86_emulate(
         if ( vex.opcx == vex_none )
         {
             if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
-                vcpu_must_have_sse2();
+                host_and_vcpu_must_have(sse2);
             else
-                vcpu_must_have_sse();
+                host_and_vcpu_must_have(sse);
             ea.bytes = 16;
             SET_SSE_PREFIX(buf[0], vex.pfx);
             get_fpu(X86EMUL_FPU_xmm, &fic);
@@ -4128,7 +4144,7 @@ x86_emulate(
                     ((vex.reg != 0xf) &&
                      ((ea.type == OP_MEM) ||
                       !(vex.pfx & VEX_PREFIX_SCALAR_MASK))));
-            vcpu_must_have_avx();
+            host_and_vcpu_must_have(avx);
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
         }
@@ -4361,16 +4377,16 @@ x86_emulate(
             {
             case vex_66:
             case vex_f3:
-                vcpu_must_have_sse2();
+                host_and_vcpu_must_have(sse2);
                 buf[0] = 0x66; /* movdqa */
                 get_fpu(X86EMUL_FPU_xmm, &fic);
                 ea.bytes = 16;
                 break;
             case vex_none:
                 if ( b != 0xe7 )
-                    vcpu_must_have_mmx();
+                    host_and_vcpu_must_have(mmx);
                 else
-                    vcpu_must_have_sse();
+                    host_and_vcpu_must_have(sse);
                 get_fpu(X86EMUL_FPU_mmx, &fic);
                 ea.bytes = 8;
                 break;
@@ -4382,7 +4398,7 @@ x86_emulate(
         {
             fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) ||
                     ((vex.pfx != vex_66) && (vex.pfx != vex_f3)));
-            vcpu_must_have_avx();
+            host_and_vcpu_must_have(avx);
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
         }
@@ -4688,7 +4704,7 @@ x86_emulate(
         generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
         generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
         if ( op_bytes == 8 )
-            vcpu_must_have_cx16();
+            host_and_vcpu_must_have(cx16);
         op_bytes *= 2;
 
         /* Get actual old value. */



[-- Attachment #2: x86emul-host-features.patch --]
[-- Type: text/plain, Size: 4358 bytes --]

x86emul: check host features alongside guest ones where needed

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1093,6 +1093,22 @@ static bool_t vcpu_has(
 #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
 #define vcpu_must_have_avx()  vcpu_must_have(0x00000001, ECX, 28)
 
+#ifdef __XEN__
+/*
+ * Note the (subtle?) difference between vcpu_must_have_<feature>() and
+ * vcpu_must_have(<feature>): The former only checks guest feature flags,
+ * while the latter also checks host ones, i.e. is required to be used when
+ * emulation code is using the same instruction class for carrying out the
+ * actual operation).
+ */
+#define host_and_vcpu_must_have(feat) ({ \
+    generate_exception_if(!cpu_has_##feat, EXC_UD, -1); \
+    vcpu_must_have_##feat(); \
+})
+#else
+#define host_and_vcpu_must_have(feat) vcpu_must_have_##feat()
+#endif
+
 static int
 in_longmode(
     struct x86_emulate_ctxt *ctxt,
@@ -3102,7 +3118,7 @@ x86_emulate(
                 emulate_fpu_insn_memsrc("fildl", src.val);
                 break;
             case 1: /* fisttp m32i */
-                vcpu_must_have_sse3();
+                host_and_vcpu_must_have(sse3);
                 ea.bytes = 4;
                 dst = ea;
                 dst.type = OP_MEM;
@@ -3211,7 +3227,7 @@ x86_emulate(
                 emulate_fpu_insn_memsrc("fldl", src.val);
                 break;
             case 1: /* fisttp m64i */
-                vcpu_must_have_sse3();
+                host_and_vcpu_must_have(sse3);
                 ea.bytes = 8;
                 dst = ea;
                 dst.type = OP_MEM;
@@ -3319,7 +3335,7 @@ x86_emulate(
                 emulate_fpu_insn_memsrc("filds", src.val);
                 break;
             case 1: /* fisttp m16i */
-                vcpu_must_have_sse3();
+                host_and_vcpu_must_have(sse3);
                 ea.bytes = 2;
                 dst = ea;
                 dst.type = OP_MEM;
@@ -4115,9 +4131,9 @@ x86_emulate(
         if ( vex.opcx == vex_none )
         {
             if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
-                vcpu_must_have_sse2();
+                host_and_vcpu_must_have(sse2);
             else
-                vcpu_must_have_sse();
+                host_and_vcpu_must_have(sse);
             ea.bytes = 16;
             SET_SSE_PREFIX(buf[0], vex.pfx);
             get_fpu(X86EMUL_FPU_xmm, &fic);
@@ -4128,7 +4144,7 @@ x86_emulate(
                     ((vex.reg != 0xf) &&
                      ((ea.type == OP_MEM) ||
                       !(vex.pfx & VEX_PREFIX_SCALAR_MASK))));
-            vcpu_must_have_avx();
+            host_and_vcpu_must_have(avx);
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
         }
@@ -4361,16 +4377,16 @@ x86_emulate(
             {
             case vex_66:
             case vex_f3:
-                vcpu_must_have_sse2();
+                host_and_vcpu_must_have(sse2);
                 buf[0] = 0x66; /* movdqa */
                 get_fpu(X86EMUL_FPU_xmm, &fic);
                 ea.bytes = 16;
                 break;
             case vex_none:
                 if ( b != 0xe7 )
-                    vcpu_must_have_mmx();
+                    host_and_vcpu_must_have(mmx);
                 else
-                    vcpu_must_have_sse();
+                    host_and_vcpu_must_have(sse);
                 get_fpu(X86EMUL_FPU_mmx, &fic);
                 ea.bytes = 8;
                 break;
@@ -4382,7 +4398,7 @@ x86_emulate(
         {
             fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) ||
                     ((vex.pfx != vex_66) && (vex.pfx != vex_f3)));
-            vcpu_must_have_avx();
+            host_and_vcpu_must_have(avx);
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
         }
@@ -4688,7 +4704,7 @@ x86_emulate(
         generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
         generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
         if ( op_bytes == 8 )
-            vcpu_must_have_cx16();
+            host_and_vcpu_must_have(cx16);
         op_bytes *= 2;
 
         /* Get actual old value. */

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 3/3] x86emul: support MOVBE and CRC32
  2016-03-11 17:29 [PATCH 0/3] x86: instruction emulator improvements Jan Beulich
  2016-03-11 17:33 ` [PATCH 1/3] x86: rename XMM* features to SSE* Jan Beulich
  2016-03-11 17:34 ` [PATCH 2/3] x86emul: check host features alongside guest ones where needed Jan Beulich
@ 2016-03-11 17:35 ` Jan Beulich
  2 siblings, 0 replies; 7+ messages in thread
From: Jan Beulich @ 2016-03-11 17:35 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 12638 bytes --]

The former in an attempt to at least gradually support all simple data
movement instructions. The latter just because it shares the opcode
with the former.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -78,7 +78,14 @@ static int cpuid(
     unsigned int *edx,
     struct x86_emulate_ctxt *ctxt)
 {
+    unsigned int leaf = *eax;
+
     asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=d" (*edx), "=b" (*ebx));
+
+    /* The emulator doesn't itself use MOVBE, so we can always run the test. */
+    if ( leaf == 1 )
+        *ecx |= 1U << 22;
+
     return X86EMUL_OKAY;
 }
 
@@ -605,6 +612,34 @@ int main(int argc, char **argv)
     printf("skipped\n");
 #endif
 
+    printf("%-40s", "Testing movbe (%%ecx),%%eax...");
+    instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf0; instr[3] = 0x01;
+    regs.eflags = 0x200;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.ecx    = (unsigned long)res;
+    regs.eax    = 0x11111111;
+    *res        = 0x12345678;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != 0x12345678) ||
+         (regs.eax != 0x78563412) ||
+         (regs.eflags != 0x200) ||
+         (regs.eip != (unsigned long)&instr[4]) )
+        goto fail;
+    printf("okay\n");
+
+    printf("%-40s", "Testing movbe %%ax,(%%ecx)...");
+    instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf1; instr[4] = 0x01;
+    regs.eip = (unsigned long)&instr[0];
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != 0x12341234) ||
+         (regs.eax != 0x78563412) ||
+         (regs.eflags != 0x200) ||
+         (regs.eip != (unsigned long)&instr[5]) )
+        goto fail;
+    printf("okay\n");
+
 #define decl_insn(which) extern const unsigned char which[], which##_len[]
 #define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \
                               #which ": " insn "\n"                     \
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -12,6 +12,7 @@ typedef bool bool_t;
 
 #define BUG() abort()
 #define ASSERT assert
+#define ASSERT_UNREACHABLE() assert(!__LINE__)
 
 #define cpu_has_amd_erratum(nr) 0
 #define mark_regs_dirty(r) ((void)(r))
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -16,6 +16,7 @@ CFLAGS += -msoft-float
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
 $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_GAS_SSE4_2)
 $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 $(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
 $(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -188,7 +188,7 @@ static uint8_t twobyte_table[256] = {
     ImplicitOps, ImplicitOps, ImplicitOps, 0,
     ImplicitOps, ImplicitOps, 0, 0,
     /* 0x38 - 0x3F */
-    0, 0, 0, 0, 0, 0, 0, 0,
+    DstReg|SrcMem|ModRM, 0, 0, 0, 0, 0, 0, 0,
     /* 0x40 - 0x47 */
     DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
     DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
@@ -1091,6 +1091,8 @@ static bool_t vcpu_has(
 #define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)
 #define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX,  0)
 #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
+#define vcpu_must_have_sse4_2() vcpu_must_have(0x00000001, ECX, 20)
+#define vcpu_must_have_movbe() vcpu_must_have(0x00000001, ECX, 22)
 #define vcpu_must_have_avx()  vcpu_must_have(0x00000001, ECX, 28)
 
 #ifdef __XEN__
@@ -1503,8 +1505,9 @@ x86_emulate(
     /* Shadow copy of register state. Committed on successful emulation. */
     struct cpu_user_regs _regs = *ctxt->regs;
 
-    uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0;
+    uint8_t b, d, sib, sib_index, sib_base, rex_prefix = 0;
     uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
+    enum { ext_none, ext_0f, ext_0f38 } ext = ext_none;
     union vex vex = {};
     unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes;
     bool_t lock_prefix = 0;
@@ -1600,9 +1603,18 @@ x86_emulate(
         /* Two-byte opcode? */
         if ( b == 0x0f )
         {
-            twobyte = 1;
             b = insn_fetch_type(uint8_t);
             d = twobyte_table[b];
+            switch ( b )
+            {
+            default:
+                ext = ext_0f;
+                break;
+            case 0x38:
+                b = insn_fetch_type(uint8_t);
+                ext = ext_0f38;
+                break;
+            }
         }
 
         /* Unrecognised? */
@@ -1619,7 +1631,7 @@ x86_emulate(
         modrm = insn_fetch_type(uint8_t);
         modrm_mod = (modrm & 0xc0) >> 6;
 
-        if ( !twobyte && ((b & ~1) == 0xc4) )
+        if ( !ext && ((b & ~1) == 0xc4) )
             switch ( def_ad_bytes )
             {
             default:
@@ -1665,12 +1677,12 @@ x86_emulate(
                     rex_prefix |= REX_R;
 
                 fail_if(vex.opcx != vex_0f);
-                twobyte = 1;
+                ext = ext_0f;
                 b = insn_fetch_type(uint8_t);
                 d = twobyte_table[b];
 
                 /* Unrecognised? */
-                if ( d == 0 )
+                if ( d == 0 || b == 0x38 )
                     goto cannot_emulate;
 
                 modrm = insn_fetch_type(uint8_t);
@@ -1756,7 +1768,7 @@ x86_emulate(
                 {
                     ea.mem.seg  = x86_seg_ss;
                     ea.mem.off += _regs.esp;
-                    if ( !twobyte && (b == 0x8f) )
+                    if ( !ext && (b == 0x8f) )
                         /* POP <rm> computes its EA post increment. */
                         ea.mem.off += ((mode_64bit() && (op_bytes == 4))
                                        ? 8 : op_bytes);
@@ -1791,12 +1803,12 @@ x86_emulate(
                         ((op_bytes == 8) ? 4 : op_bytes);
                 else if ( (d & SrcMask) == SrcImmByte )
                     ea.mem.off += 1;
-                else if ( !twobyte && ((b & 0xfe) == 0xf6) &&
+                else if ( !ext && ((b & 0xfe) == 0xf6) &&
                           ((modrm_reg & 7) <= 1) )
                     /* Special case in Grp3: test has immediate operand. */
                     ea.mem.off += (d & ByteOp) ? 1
                         : ((op_bytes == 8) ? 4 : op_bytes);
-                else if ( twobyte && ((b & 0xf7) == 0xa4) )
+                else if ( ext == ext_0f && ((b & 0xf7) == 0xa4) )
                     /* SHLD/SHRD with immediate byte third operand. */
                     ea.mem.off++;
                 break;
@@ -1815,7 +1827,9 @@ x86_emulate(
         ea.mem.seg = override_seg;
 
     /* Early operand adjustments. */
-    if ( !twobyte )
+    switch ( ext )
+    {
+    case ext_none:
         switch ( b )
         {
         case 0xf6 ... 0xf7: /* Grp3 */
@@ -1848,6 +1862,29 @@ x86_emulate(
             }
             break;
         }
+        break;
+
+    case ext_0f:
+        break;
+
+    case ext_0f38:
+        switch ( b )
+        {
+        case 0xf0: /* movbe / crc32 */
+            d |= repne_prefix() ? ByteOp : Mov;
+            break;
+        case 0xf1: /* movbe / crc32 */
+            if ( !repne_prefix() )
+                d = (d & ~(DstMask | SrcMask)) | DstMem | SrcReg | Mov;
+            break;
+        default: /* Until it is worth making this table based ... */
+            goto cannot_emulate;
+        }
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+    }
 
     /* Decode and fetch the source operand: register, memory or immediate. */
     switch ( d & SrcMask )
@@ -2006,8 +2043,18 @@ x86_emulate(
         break;
     }
 
-    if ( twobyte )
-        goto twobyte_insn;
+    switch ( ext )
+    {
+    case ext_none:
+        break;
+    case ext_0f:
+        goto ext_0f_insn;
+    case ext_0f38:
+        goto ext_0f38_insn;
+    default:
+        ASSERT_UNREACHABLE();
+        goto cannot_emulate;
+    }
 
     switch ( b )
     {
@@ -2050,7 +2097,7 @@ x86_emulate(
         struct segment_register reg;
         src.val = x86_seg_es;
     push_seg:
-        generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+        generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         fail_if(ops->read_segment == NULL);
         if ( (rc = ops->read_segment(src.val, &reg, ctxt)) != 0 )
             return rc;
@@ -2066,7 +2113,7 @@ x86_emulate(
     case 0x07: /* pop %%es */
         src.val = x86_seg_es;
     pop_seg:
-        generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+        generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         fail_if(ops->write_segment == NULL);
         /* 64-bit mode: POP defaults to a 64-bit operand. */
         if ( mode_64bit() && (op_bytes == 4) )
@@ -2721,7 +2768,7 @@ x86_emulate(
         unsigned long sel;
         dst.val = x86_seg_es;
     les: /* dst.val identifies the segment */
-        generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+        generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         generate_exception_if(src.type != OP_MEM, EXC_UD, -1);
         if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes,
                               &sel, 2, ctxt, ops)) != 0 )
@@ -3862,7 +3909,7 @@ x86_emulate(
     put_stub(stub);
     return rc;
 
- twobyte_insn:
+ ext_0f_insn:
     switch ( b )
     {
     case 0x00: /* Grp6 */
@@ -4765,6 +4812,72 @@ x86_emulate(
     }
     goto writeback;
 
+ ext_0f38_insn:
+    switch ( b )
+    {
+    case 0xf0: case 0xf1: /* movbe / crc32 */
+        generate_exception_if(repe_prefix(), EXC_UD, -1);
+        if ( repne_prefix() )
+        {
+            /* crc32 */
+#ifdef HAVE_GAS_SSE4_2
+            host_and_vcpu_must_have(sse4_2);
+            dst.bytes = rex_prefix & REX_W ? 8 : 4;
+            switch ( op_bytes )
+            {
+            case 1:
+                asm ( "crc32b %1,%k0" : "+r" (dst.val)
+                                      : "qm" (*(uint8_t *)&src.val) );
+                break;
+            case 2:
+                asm ( "crc32w %1,%k0" : "+r" (dst.val)
+                                      : "rm" (*(uint16_t *)&src.val) );
+                break;
+            case 4:
+                asm ( "crc32l %1,%k0" : "+r" (dst.val)
+                                      : "rm" (*(uint32_t *)&src.val) );
+                break;
+# ifdef __x86_64__
+            case 8:
+                asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) );
+                break;
+# endif
+            default:
+                ASSERT_UNREACHABLE();
+            }
+#else /* !HAVE_GAS_SSE4_2 */
+            goto cannot_emulate;
+#endif
+        }
+        else
+        {
+            /* movbe */
+            vcpu_must_have_movbe();
+            switch ( op_bytes )
+            {
+            case 2:
+                asm ( "xchg %h0,%b0" : "=Q" (dst.val)
+                                     : "0" (*(uint32_t *)&src.val) );
+                break;
+            case 4:
+#ifdef __x86_64__
+                asm ( "bswap %k0" : "=r" (dst.val)
+                                  : "0" (*(uint32_t *)&src.val) );
+                break;
+            case 8:
+#endif
+                asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) );
+                break;
+            default:
+                ASSERT_UNREACHABLE();
+            }
+        }
+        break;
+    default:
+        goto cannot_emulate;
+    }
+    goto writeback;
+
  cannot_emulate:
     _put_fpu();
     put_stub(stub);
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -189,6 +189,7 @@
 #define cpu_has_sse		boot_cpu_has(X86_FEATURE_SSE)
 #define cpu_has_sse2		boot_cpu_has(X86_FEATURE_SSE2)
 #define cpu_has_sse3		boot_cpu_has(X86_FEATURE_SSE3)
+#define cpu_has_sse4_2		boot_cpu_has(X86_FEATURE_SSE4_2)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_mp		1
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)



[-- Attachment #2: x86emul-movbe.patch --]
[-- Type: text/plain, Size: 12670 bytes --]

x86emul: support MOVBE and CRC32

The former in an attempt to at least gradually support all simple data
movement instructions. The latter just because it shares the opcode
with the former.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -78,7 +78,14 @@ static int cpuid(
     unsigned int *edx,
     struct x86_emulate_ctxt *ctxt)
 {
+    unsigned int leaf = *eax;
+
     asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=d" (*edx), "=b" (*ebx));
+
+    /* The emulator doesn't itself use MOVBE, so we can always run the test. */
+    if ( leaf == 1 )
+        *ecx |= 1U << 22;
+
     return X86EMUL_OKAY;
 }
 
@@ -605,6 +612,34 @@ int main(int argc, char **argv)
     printf("skipped\n");
 #endif
 
+    printf("%-40s", "Testing movbe (%%ecx),%%eax...");
+    instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf0; instr[3] = 0x01;
+    regs.eflags = 0x200;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.ecx    = (unsigned long)res;
+    regs.eax    = 0x11111111;
+    *res        = 0x12345678;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != 0x12345678) ||
+         (regs.eax != 0x78563412) ||
+         (regs.eflags != 0x200) ||
+         (regs.eip != (unsigned long)&instr[4]) )
+        goto fail;
+    printf("okay\n");
+
+    printf("%-40s", "Testing movbe %%ax,(%%ecx)...");
+    instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf1; instr[4] = 0x01;
+    regs.eip = (unsigned long)&instr[0];
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != 0x12341234) ||
+         (regs.eax != 0x78563412) ||
+         (regs.eflags != 0x200) ||
+         (regs.eip != (unsigned long)&instr[5]) )
+        goto fail;
+    printf("okay\n");
+
 #define decl_insn(which) extern const unsigned char which[], which##_len[]
 #define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \
                               #which ": " insn "\n"                     \
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -12,6 +12,7 @@ typedef bool bool_t;
 
 #define BUG() abort()
 #define ASSERT assert
+#define ASSERT_UNREACHABLE() assert(!__LINE__)
 
 #define cpu_has_amd_erratum(nr) 0
 #define mark_regs_dirty(r) ((void)(r))
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -16,6 +16,7 @@ CFLAGS += -msoft-float
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
 $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_GAS_SSE4_2)
 $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 $(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
 $(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -188,7 +188,7 @@ static uint8_t twobyte_table[256] = {
     ImplicitOps, ImplicitOps, ImplicitOps, 0,
     ImplicitOps, ImplicitOps, 0, 0,
     /* 0x38 - 0x3F */
-    0, 0, 0, 0, 0, 0, 0, 0,
+    DstReg|SrcMem|ModRM, 0, 0, 0, 0, 0, 0, 0,
     /* 0x40 - 0x47 */
     DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
     DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
@@ -1091,6 +1091,8 @@ static bool_t vcpu_has(
 #define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)
 #define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX,  0)
 #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
+#define vcpu_must_have_sse4_2() vcpu_must_have(0x00000001, ECX, 20)
+#define vcpu_must_have_movbe() vcpu_must_have(0x00000001, ECX, 22)
 #define vcpu_must_have_avx()  vcpu_must_have(0x00000001, ECX, 28)
 
 #ifdef __XEN__
@@ -1503,8 +1505,9 @@ x86_emulate(
     /* Shadow copy of register state. Committed on successful emulation. */
     struct cpu_user_regs _regs = *ctxt->regs;
 
-    uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0;
+    uint8_t b, d, sib, sib_index, sib_base, rex_prefix = 0;
     uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
+    enum { ext_none, ext_0f, ext_0f38 } ext = ext_none;
     union vex vex = {};
     unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes;
     bool_t lock_prefix = 0;
@@ -1600,9 +1603,18 @@ x86_emulate(
         /* Two-byte opcode? */
         if ( b == 0x0f )
         {
-            twobyte = 1;
             b = insn_fetch_type(uint8_t);
             d = twobyte_table[b];
+            switch ( b )
+            {
+            default:
+                ext = ext_0f;
+                break;
+            case 0x38:
+                b = insn_fetch_type(uint8_t);
+                ext = ext_0f38;
+                break;
+            }
         }
 
         /* Unrecognised? */
@@ -1619,7 +1631,7 @@ x86_emulate(
         modrm = insn_fetch_type(uint8_t);
         modrm_mod = (modrm & 0xc0) >> 6;
 
-        if ( !twobyte && ((b & ~1) == 0xc4) )
+        if ( !ext && ((b & ~1) == 0xc4) )
             switch ( def_ad_bytes )
             {
             default:
@@ -1665,12 +1677,12 @@ x86_emulate(
                     rex_prefix |= REX_R;
 
                 fail_if(vex.opcx != vex_0f);
-                twobyte = 1;
+                ext = ext_0f;
                 b = insn_fetch_type(uint8_t);
                 d = twobyte_table[b];
 
                 /* Unrecognised? */
-                if ( d == 0 )
+                if ( d == 0 || b == 0x38 )
                     goto cannot_emulate;
 
                 modrm = insn_fetch_type(uint8_t);
@@ -1756,7 +1768,7 @@ x86_emulate(
                 {
                     ea.mem.seg  = x86_seg_ss;
                     ea.mem.off += _regs.esp;
-                    if ( !twobyte && (b == 0x8f) )
+                    if ( !ext && (b == 0x8f) )
                         /* POP <rm> computes its EA post increment. */
                         ea.mem.off += ((mode_64bit() && (op_bytes == 4))
                                        ? 8 : op_bytes);
@@ -1791,12 +1803,12 @@ x86_emulate(
                         ((op_bytes == 8) ? 4 : op_bytes);
                 else if ( (d & SrcMask) == SrcImmByte )
                     ea.mem.off += 1;
-                else if ( !twobyte && ((b & 0xfe) == 0xf6) &&
+                else if ( !ext && ((b & 0xfe) == 0xf6) &&
                           ((modrm_reg & 7) <= 1) )
                     /* Special case in Grp3: test has immediate operand. */
                     ea.mem.off += (d & ByteOp) ? 1
                         : ((op_bytes == 8) ? 4 : op_bytes);
-                else if ( twobyte && ((b & 0xf7) == 0xa4) )
+                else if ( ext == ext_0f && ((b & 0xf7) == 0xa4) )
                     /* SHLD/SHRD with immediate byte third operand. */
                     ea.mem.off++;
                 break;
@@ -1815,7 +1827,9 @@ x86_emulate(
         ea.mem.seg = override_seg;
 
     /* Early operand adjustments. */
-    if ( !twobyte )
+    switch ( ext )
+    {
+    case ext_none:
         switch ( b )
         {
         case 0xf6 ... 0xf7: /* Grp3 */
@@ -1848,6 +1862,29 @@ x86_emulate(
             }
             break;
         }
+        break;
+
+    case ext_0f:
+        break;
+
+    case ext_0f38:
+        switch ( b )
+        {
+        case 0xf0: /* movbe / crc32 */
+            d |= repne_prefix() ? ByteOp : Mov;
+            break;
+        case 0xf1: /* movbe / crc32 */
+            if ( !repne_prefix() )
+                d = (d & ~(DstMask | SrcMask)) | DstMem | SrcReg | Mov;
+            break;
+        default: /* Until it is worth making this table based ... */
+            goto cannot_emulate;
+        }
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+    }
 
     /* Decode and fetch the source operand: register, memory or immediate. */
     switch ( d & SrcMask )
@@ -2006,8 +2043,18 @@ x86_emulate(
         break;
     }
 
-    if ( twobyte )
-        goto twobyte_insn;
+    switch ( ext )
+    {
+    case ext_none:
+        break;
+    case ext_0f:
+        goto ext_0f_insn;
+    case ext_0f38:
+        goto ext_0f38_insn;
+    default:
+        ASSERT_UNREACHABLE();
+        goto cannot_emulate;
+    }
 
     switch ( b )
     {
@@ -2050,7 +2097,7 @@ x86_emulate(
         struct segment_register reg;
         src.val = x86_seg_es;
     push_seg:
-        generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+        generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         fail_if(ops->read_segment == NULL);
         if ( (rc = ops->read_segment(src.val, &reg, ctxt)) != 0 )
             return rc;
@@ -2066,7 +2113,7 @@ x86_emulate(
     case 0x07: /* pop %%es */
         src.val = x86_seg_es;
     pop_seg:
-        generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+        generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         fail_if(ops->write_segment == NULL);
         /* 64-bit mode: POP defaults to a 64-bit operand. */
         if ( mode_64bit() && (op_bytes == 4) )
@@ -2721,7 +2768,7 @@ x86_emulate(
         unsigned long sel;
         dst.val = x86_seg_es;
     les: /* dst.val identifies the segment */
-        generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+        generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         generate_exception_if(src.type != OP_MEM, EXC_UD, -1);
         if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes,
                               &sel, 2, ctxt, ops)) != 0 )
@@ -3862,7 +3909,7 @@ x86_emulate(
     put_stub(stub);
     return rc;
 
- twobyte_insn:
+ ext_0f_insn:
     switch ( b )
     {
     case 0x00: /* Grp6 */
@@ -4765,6 +4812,72 @@ x86_emulate(
     }
     goto writeback;
 
+ ext_0f38_insn:
+    switch ( b )
+    {
+    case 0xf0: case 0xf1: /* movbe / crc32 */
+        generate_exception_if(repe_prefix(), EXC_UD, -1);
+        if ( repne_prefix() )
+        {
+            /* crc32 */
+#ifdef HAVE_GAS_SSE4_2
+            host_and_vcpu_must_have(sse4_2);
+            dst.bytes = rex_prefix & REX_W ? 8 : 4;
+            switch ( op_bytes )
+            {
+            case 1:
+                asm ( "crc32b %1,%k0" : "+r" (dst.val)
+                                      : "qm" (*(uint8_t *)&src.val) );
+                break;
+            case 2:
+                asm ( "crc32w %1,%k0" : "+r" (dst.val)
+                                      : "rm" (*(uint16_t *)&src.val) );
+                break;
+            case 4:
+                asm ( "crc32l %1,%k0" : "+r" (dst.val)
+                                      : "rm" (*(uint32_t *)&src.val) );
+                break;
+# ifdef __x86_64__
+            case 8:
+                asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) );
+                break;
+# endif
+            default:
+                ASSERT_UNREACHABLE();
+            }
+#else /* !HAVE_GAS_SSE4_2 */
+            goto cannot_emulate;
+#endif
+        }
+        else
+        {
+            /* movbe */
+            vcpu_must_have_movbe();
+            switch ( op_bytes )
+            {
+            case 2:
+                asm ( "xchg %h0,%b0" : "=Q" (dst.val)
+                                     : "0" (*(uint32_t *)&src.val) );
+                break;
+            case 4:
+#ifdef __x86_64__
+                asm ( "bswap %k0" : "=r" (dst.val)
+                                  : "0" (*(uint32_t *)&src.val) );
+                break;
+            case 8:
+#endif
+                asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) );
+                break;
+            default:
+                ASSERT_UNREACHABLE();
+            }
+        }
+        break;
+    default:
+        goto cannot_emulate;
+    }
+    goto writeback;
+
  cannot_emulate:
     _put_fpu();
     put_stub(stub);
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -189,6 +189,7 @@
 #define cpu_has_sse		boot_cpu_has(X86_FEATURE_SSE)
 #define cpu_has_sse2		boot_cpu_has(X86_FEATURE_SSE2)
 #define cpu_has_sse3		boot_cpu_has(X86_FEATURE_SSE3)
+#define cpu_has_sse4_2		boot_cpu_has(X86_FEATURE_SSE4_2)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_mp		1
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] x86emul: check host features alongside guest ones where needed
  2016-03-11 17:34 ` [PATCH 2/3] x86emul: check host features alongside guest ones where needed Jan Beulich
@ 2016-03-11 17:41   ` Andrew Cooper
  2016-03-14  8:29     ` Jan Beulich
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Cooper @ 2016-03-11 17:41 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Keir Fraser

On 11/03/16 17:34, Jan Beulich wrote:
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -1093,6 +1093,22 @@ static bool_t vcpu_has(
>  #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
>  #define vcpu_must_have_avx()  vcpu_must_have(0x00000001, ECX, 28)
>  
> +#ifdef __XEN__
> +/*
> + * Note the (subtle?) difference between vcpu_must_have_<feature>() and
> + * vcpu_must_have(<feature>): The former only checks guest feature flags,
> + * while the latter also checks host ones, i.e. is required to be used when
> + * emulation code is using the same instruction class for carrying out the
> + * actual operation).
> + */

This comment is now stale.

With this dropped, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] x86emul: check host features alongside guest ones where needed
  2016-03-11 17:41   ` Andrew Cooper
@ 2016-03-14  8:29     ` Jan Beulich
  2016-03-14  8:52       ` Andrew Cooper
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2016-03-14  8:29 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser

>>> On 11.03.16 at 18:41, <andrew.cooper3@citrix.com> wrote:
> On 11/03/16 17:34, Jan Beulich wrote:
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>
>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>> @@ -1093,6 +1093,22 @@ static bool_t vcpu_has(
>>  #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
>>  #define vcpu_must_have_avx()  vcpu_must_have(0x00000001, ECX, 28)
>>  
>> +#ifdef __XEN__
>> +/*
>> + * Note the (subtle?) difference between vcpu_must_have_<feature>() and
>> + * vcpu_must_have(<feature>): The former only checks guest feature flags,
>> + * while the latter also checks host ones, i.e. is required to be used when
>> + * emulation code is using the same instruction class for carrying out the
>> + * actual operation).
>> + */
> 
> This comment is now stale.
> 
> With this dropped, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

I suppose you're okay with it being adjusted instead of fully dropped:

/*
 * Note the difference between vcpu_must_have_<feature>() and
 * host_and_vcpu_must_have(<feature>): The latter needs to be used when
 * emulation code is using the same instruction class for carrying out
 * the actual operation.
 */

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] x86emul: check host features alongside guest ones where needed
  2016-03-14  8:29     ` Jan Beulich
@ 2016-03-14  8:52       ` Andrew Cooper
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Cooper @ 2016-03-14  8:52 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser

On 14/03/2016 08:29, Jan Beulich wrote:
>>>> On 11.03.16 at 18:41, <andrew.cooper3@citrix.com> wrote:
>> On 11/03/16 17:34, Jan Beulich wrote:
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>
>>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>>> @@ -1093,6 +1093,22 @@ static bool_t vcpu_has(
>>>  #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
>>>  #define vcpu_must_have_avx()  vcpu_must_have(0x00000001, ECX, 28)
>>>  
>>> +#ifdef __XEN__
>>> +/*
>>> + * Note the (subtle?) difference between vcpu_must_have_<feature>() and
>>> + * vcpu_must_have(<feature>): The former only checks guest feature flags,
>>> + * while the latter also checks host ones, i.e. is required to be used when
>>> + * emulation code is using the same instruction class for carrying out the
>>> + * actual operation).
>>> + */
>> This comment is now stale.
>>
>> With this dropped, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
> I suppose you're okay with it being adjusted instead of fully dropped:
>
> /*
>  * Note the difference between vcpu_must_have_<feature>() and
>  * host_and_vcpu_must_have(<feature>): The latter needs to be used when
>  * emulation code is using the same instruction class for carrying out
>  * the actual operation.
>  */

Yes - that's great.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-03-14  8:52 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-11 17:29 [PATCH 0/3] x86: instruction emulator improvements Jan Beulich
2016-03-11 17:33 ` [PATCH 1/3] x86: rename XMM* features to SSE* Jan Beulich
2016-03-11 17:34 ` [PATCH 2/3] x86emul: check host features alongside guest ones where needed Jan Beulich
2016-03-11 17:41   ` Andrew Cooper
2016-03-14  8:29     ` Jan Beulich
2016-03-14  8:52       ` Andrew Cooper
2016-03-11 17:35 ` [PATCH 3/3] x86emul: support MOVBE and CRC32 Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).