* [PATCH 2/3] x86emul: check host features alongside guest ones where needed
2016-03-11 17:29 [PATCH 0/3] x86: instruction emulator improvements Jan Beulich
2016-03-11 17:33 ` [PATCH 1/3] x86: rename XMM* features to SSE* Jan Beulich
@ 2016-03-11 17:34 ` Jan Beulich
2016-03-11 17:41 ` Andrew Cooper
2016-03-11 17:35 ` [PATCH 3/3] x86emul: support MOVBE and CRC32 Jan Beulich
2 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2016-03-11 17:34 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Keir Fraser
[-- Attachment #1: Type: text/plain, Size: 4296 bytes --]
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1093,6 +1093,22 @@ static bool_t vcpu_has(
#define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
#define vcpu_must_have_avx() vcpu_must_have(0x00000001, ECX, 28)
+#ifdef __XEN__
+/*
+ * Note the (subtle?) difference between vcpu_must_have_<feature>() and
+ * vcpu_must_have(<feature>): The former only checks guest feature flags,
+ * while the latter also checks host ones, i.e. is required to be used when
+ * emulation code is using the same instruction class for carrying out the
+ * actual operation).
+ */
+#define host_and_vcpu_must_have(feat) ({ \
+ generate_exception_if(!cpu_has_##feat, EXC_UD, -1); \
+ vcpu_must_have_##feat(); \
+})
+#else
+#define host_and_vcpu_must_have(feat) vcpu_must_have_##feat()
+#endif
+
static int
in_longmode(
struct x86_emulate_ctxt *ctxt,
@@ -3102,7 +3118,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fildl", src.val);
break;
case 1: /* fisttp m32i */
- vcpu_must_have_sse3();
+ host_and_vcpu_must_have(sse3);
ea.bytes = 4;
dst = ea;
dst.type = OP_MEM;
@@ -3211,7 +3227,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fldl", src.val);
break;
case 1: /* fisttp m64i */
- vcpu_must_have_sse3();
+ host_and_vcpu_must_have(sse3);
ea.bytes = 8;
dst = ea;
dst.type = OP_MEM;
@@ -3319,7 +3335,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("filds", src.val);
break;
case 1: /* fisttp m16i */
- vcpu_must_have_sse3();
+ host_and_vcpu_must_have(sse3);
ea.bytes = 2;
dst = ea;
dst.type = OP_MEM;
@@ -4115,9 +4131,9 @@ x86_emulate(
if ( vex.opcx == vex_none )
{
if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
- vcpu_must_have_sse2();
+ host_and_vcpu_must_have(sse2);
else
- vcpu_must_have_sse();
+ host_and_vcpu_must_have(sse);
ea.bytes = 16;
SET_SSE_PREFIX(buf[0], vex.pfx);
get_fpu(X86EMUL_FPU_xmm, &fic);
@@ -4128,7 +4144,7 @@ x86_emulate(
((vex.reg != 0xf) &&
((ea.type == OP_MEM) ||
!(vex.pfx & VEX_PREFIX_SCALAR_MASK))));
- vcpu_must_have_avx();
+ host_and_vcpu_must_have(avx);
get_fpu(X86EMUL_FPU_ymm, &fic);
ea.bytes = 16 << vex.l;
}
@@ -4361,16 +4377,16 @@ x86_emulate(
{
case vex_66:
case vex_f3:
- vcpu_must_have_sse2();
+ host_and_vcpu_must_have(sse2);
buf[0] = 0x66; /* movdqa */
get_fpu(X86EMUL_FPU_xmm, &fic);
ea.bytes = 16;
break;
case vex_none:
if ( b != 0xe7 )
- vcpu_must_have_mmx();
+ host_and_vcpu_must_have(mmx);
else
- vcpu_must_have_sse();
+ host_and_vcpu_must_have(sse);
get_fpu(X86EMUL_FPU_mmx, &fic);
ea.bytes = 8;
break;
@@ -4382,7 +4398,7 @@ x86_emulate(
{
fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) ||
((vex.pfx != vex_66) && (vex.pfx != vex_f3)));
- vcpu_must_have_avx();
+ host_and_vcpu_must_have(avx);
get_fpu(X86EMUL_FPU_ymm, &fic);
ea.bytes = 16 << vex.l;
}
@@ -4688,7 +4704,7 @@ x86_emulate(
generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
if ( op_bytes == 8 )
- vcpu_must_have_cx16();
+ host_and_vcpu_must_have(cx16);
op_bytes *= 2;
/* Get actual old value. */
[-- Attachment #2: x86emul-host-features.patch --]
[-- Type: text/plain, Size: 4358 bytes --]
x86emul: check host features alongside guest ones where needed
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1093,6 +1093,22 @@ static bool_t vcpu_has(
#define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
#define vcpu_must_have_avx() vcpu_must_have(0x00000001, ECX, 28)
+#ifdef __XEN__
+/*
+ * Note the (subtle?) difference between vcpu_must_have_<feature>() and
+ * vcpu_must_have(<feature>): The former only checks guest feature flags,
+ * while the latter also checks host ones, i.e. is required to be used when
+ * emulation code is using the same instruction class for carrying out the
+ * actual operation).
+ */
+#define host_and_vcpu_must_have(feat) ({ \
+ generate_exception_if(!cpu_has_##feat, EXC_UD, -1); \
+ vcpu_must_have_##feat(); \
+})
+#else
+#define host_and_vcpu_must_have(feat) vcpu_must_have_##feat()
+#endif
+
static int
in_longmode(
struct x86_emulate_ctxt *ctxt,
@@ -3102,7 +3118,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fildl", src.val);
break;
case 1: /* fisttp m32i */
- vcpu_must_have_sse3();
+ host_and_vcpu_must_have(sse3);
ea.bytes = 4;
dst = ea;
dst.type = OP_MEM;
@@ -3211,7 +3227,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fldl", src.val);
break;
case 1: /* fisttp m64i */
- vcpu_must_have_sse3();
+ host_and_vcpu_must_have(sse3);
ea.bytes = 8;
dst = ea;
dst.type = OP_MEM;
@@ -3319,7 +3335,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("filds", src.val);
break;
case 1: /* fisttp m16i */
- vcpu_must_have_sse3();
+ host_and_vcpu_must_have(sse3);
ea.bytes = 2;
dst = ea;
dst.type = OP_MEM;
@@ -4115,9 +4131,9 @@ x86_emulate(
if ( vex.opcx == vex_none )
{
if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
- vcpu_must_have_sse2();
+ host_and_vcpu_must_have(sse2);
else
- vcpu_must_have_sse();
+ host_and_vcpu_must_have(sse);
ea.bytes = 16;
SET_SSE_PREFIX(buf[0], vex.pfx);
get_fpu(X86EMUL_FPU_xmm, &fic);
@@ -4128,7 +4144,7 @@ x86_emulate(
((vex.reg != 0xf) &&
((ea.type == OP_MEM) ||
!(vex.pfx & VEX_PREFIX_SCALAR_MASK))));
- vcpu_must_have_avx();
+ host_and_vcpu_must_have(avx);
get_fpu(X86EMUL_FPU_ymm, &fic);
ea.bytes = 16 << vex.l;
}
@@ -4361,16 +4377,16 @@ x86_emulate(
{
case vex_66:
case vex_f3:
- vcpu_must_have_sse2();
+ host_and_vcpu_must_have(sse2);
buf[0] = 0x66; /* movdqa */
get_fpu(X86EMUL_FPU_xmm, &fic);
ea.bytes = 16;
break;
case vex_none:
if ( b != 0xe7 )
- vcpu_must_have_mmx();
+ host_and_vcpu_must_have(mmx);
else
- vcpu_must_have_sse();
+ host_and_vcpu_must_have(sse);
get_fpu(X86EMUL_FPU_mmx, &fic);
ea.bytes = 8;
break;
@@ -4382,7 +4398,7 @@ x86_emulate(
{
fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) ||
((vex.pfx != vex_66) && (vex.pfx != vex_f3)));
- vcpu_must_have_avx();
+ host_and_vcpu_must_have(avx);
get_fpu(X86EMUL_FPU_ymm, &fic);
ea.bytes = 16 << vex.l;
}
@@ -4688,7 +4704,7 @@ x86_emulate(
generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
if ( op_bytes == 8 )
- vcpu_must_have_cx16();
+ host_and_vcpu_must_have(cx16);
op_bytes *= 2;
/* Get actual old value. */
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 3/3] x86emul: support MOVBE and CRC32
2016-03-11 17:29 [PATCH 0/3] x86: instruction emulator improvements Jan Beulich
2016-03-11 17:33 ` [PATCH 1/3] x86: rename XMM* features to SSE* Jan Beulich
2016-03-11 17:34 ` [PATCH 2/3] x86emul: check host features alongside guest ones where needed Jan Beulich
@ 2016-03-11 17:35 ` Jan Beulich
2 siblings, 0 replies; 7+ messages in thread
From: Jan Beulich @ 2016-03-11 17:35 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Keir Fraser
[-- Attachment #1: Type: text/plain, Size: 12638 bytes --]
The former in an attempt to at least gradually support all simple data
movement instructions. The latter just because it shares the opcode
with the former.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -78,7 +78,14 @@ static int cpuid(
unsigned int *edx,
struct x86_emulate_ctxt *ctxt)
{
+ unsigned int leaf = *eax;
+
asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=d" (*edx), "=b" (*ebx));
+
+ /* The emulator doesn't itself use MOVBE, so we can always run the test. */
+ if ( leaf == 1 )
+ *ecx |= 1U << 22;
+
return X86EMUL_OKAY;
}
@@ -605,6 +612,34 @@ int main(int argc, char **argv)
printf("skipped\n");
#endif
+ printf("%-40s", "Testing movbe (%%ecx),%%eax...");
+ instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf0; instr[3] = 0x01;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+ regs.ecx = (unsigned long)res;
+ regs.eax = 0x11111111;
+ *res = 0x12345678;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) ||
+ (*res != 0x12345678) ||
+ (regs.eax != 0x78563412) ||
+ (regs.eflags != 0x200) ||
+ (regs.eip != (unsigned long)&instr[4]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing movbe %%ax,(%%ecx)...");
+ instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf1; instr[4] = 0x01;
+ regs.eip = (unsigned long)&instr[0];
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) ||
+ (*res != 0x12341234) ||
+ (regs.eax != 0x78563412) ||
+ (regs.eflags != 0x200) ||
+ (regs.eip != (unsigned long)&instr[5]) )
+ goto fail;
+ printf("okay\n");
+
#define decl_insn(which) extern const unsigned char which[], which##_len[]
#define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \
#which ": " insn "\n" \
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -12,6 +12,7 @@ typedef bool bool_t;
#define BUG() abort()
#define ASSERT assert
+#define ASSERT_UNREACHABLE() assert(!__LINE__)
#define cpu_has_amd_erratum(nr) 0
#define mark_regs_dirty(r) ((void)(r))
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -16,6 +16,7 @@ CFLAGS += -msoft-float
$(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
$(call cc-option-add,CFLAGS,CC,-Wnested-externs)
$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_GAS_SSE4_2)
$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
$(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
$(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -188,7 +188,7 @@ static uint8_t twobyte_table[256] = {
ImplicitOps, ImplicitOps, ImplicitOps, 0,
ImplicitOps, ImplicitOps, 0, 0,
/* 0x38 - 0x3F */
- 0, 0, 0, 0, 0, 0, 0, 0,
+ DstReg|SrcMem|ModRM, 0, 0, 0, 0, 0, 0, 0,
/* 0x40 - 0x47 */
DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
@@ -1091,6 +1091,8 @@ static bool_t vcpu_has(
#define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)
#define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX, 0)
#define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
+#define vcpu_must_have_sse4_2() vcpu_must_have(0x00000001, ECX, 20)
+#define vcpu_must_have_movbe() vcpu_must_have(0x00000001, ECX, 22)
#define vcpu_must_have_avx() vcpu_must_have(0x00000001, ECX, 28)
#ifdef __XEN__
@@ -1503,8 +1505,9 @@ x86_emulate(
/* Shadow copy of register state. Committed on successful emulation. */
struct cpu_user_regs _regs = *ctxt->regs;
- uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0;
+ uint8_t b, d, sib, sib_index, sib_base, rex_prefix = 0;
uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
+ enum { ext_none, ext_0f, ext_0f38 } ext = ext_none;
union vex vex = {};
unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes;
bool_t lock_prefix = 0;
@@ -1600,9 +1603,18 @@ x86_emulate(
/* Two-byte opcode? */
if ( b == 0x0f )
{
- twobyte = 1;
b = insn_fetch_type(uint8_t);
d = twobyte_table[b];
+ switch ( b )
+ {
+ default:
+ ext = ext_0f;
+ break;
+ case 0x38:
+ b = insn_fetch_type(uint8_t);
+ ext = ext_0f38;
+ break;
+ }
}
/* Unrecognised? */
@@ -1619,7 +1631,7 @@ x86_emulate(
modrm = insn_fetch_type(uint8_t);
modrm_mod = (modrm & 0xc0) >> 6;
- if ( !twobyte && ((b & ~1) == 0xc4) )
+ if ( !ext && ((b & ~1) == 0xc4) )
switch ( def_ad_bytes )
{
default:
@@ -1665,12 +1677,12 @@ x86_emulate(
rex_prefix |= REX_R;
fail_if(vex.opcx != vex_0f);
- twobyte = 1;
+ ext = ext_0f;
b = insn_fetch_type(uint8_t);
d = twobyte_table[b];
/* Unrecognised? */
- if ( d == 0 )
+ if ( d == 0 || b == 0x38 )
goto cannot_emulate;
modrm = insn_fetch_type(uint8_t);
@@ -1756,7 +1768,7 @@ x86_emulate(
{
ea.mem.seg = x86_seg_ss;
ea.mem.off += _regs.esp;
- if ( !twobyte && (b == 0x8f) )
+ if ( !ext && (b == 0x8f) )
/* POP <rm> computes its EA post increment. */
ea.mem.off += ((mode_64bit() && (op_bytes == 4))
? 8 : op_bytes);
@@ -1791,12 +1803,12 @@ x86_emulate(
((op_bytes == 8) ? 4 : op_bytes);
else if ( (d & SrcMask) == SrcImmByte )
ea.mem.off += 1;
- else if ( !twobyte && ((b & 0xfe) == 0xf6) &&
+ else if ( !ext && ((b & 0xfe) == 0xf6) &&
((modrm_reg & 7) <= 1) )
/* Special case in Grp3: test has immediate operand. */
ea.mem.off += (d & ByteOp) ? 1
: ((op_bytes == 8) ? 4 : op_bytes);
- else if ( twobyte && ((b & 0xf7) == 0xa4) )
+ else if ( ext == ext_0f && ((b & 0xf7) == 0xa4) )
/* SHLD/SHRD with immediate byte third operand. */
ea.mem.off++;
break;
@@ -1815,7 +1827,9 @@ x86_emulate(
ea.mem.seg = override_seg;
/* Early operand adjustments. */
- if ( !twobyte )
+ switch ( ext )
+ {
+ case ext_none:
switch ( b )
{
case 0xf6 ... 0xf7: /* Grp3 */
@@ -1848,6 +1862,29 @@ x86_emulate(
}
break;
}
+ break;
+
+ case ext_0f:
+ break;
+
+ case ext_0f38:
+ switch ( b )
+ {
+ case 0xf0: /* movbe / crc32 */
+ d |= repne_prefix() ? ByteOp : Mov;
+ break;
+ case 0xf1: /* movbe / crc32 */
+ if ( !repne_prefix() )
+ d = (d & ~(DstMask | SrcMask)) | DstMem | SrcReg | Mov;
+ break;
+ default: /* Until it is worth making this table based ... */
+ goto cannot_emulate;
+ }
+ break;
+
+ default:
+ ASSERT_UNREACHABLE();
+ }
/* Decode and fetch the source operand: register, memory or immediate. */
switch ( d & SrcMask )
@@ -2006,8 +2043,18 @@ x86_emulate(
break;
}
- if ( twobyte )
- goto twobyte_insn;
+ switch ( ext )
+ {
+ case ext_none:
+ break;
+ case ext_0f:
+ goto ext_0f_insn;
+ case ext_0f38:
+ goto ext_0f38_insn;
+ default:
+ ASSERT_UNREACHABLE();
+ goto cannot_emulate;
+ }
switch ( b )
{
@@ -2050,7 +2097,7 @@ x86_emulate(
struct segment_register reg;
src.val = x86_seg_es;
push_seg:
- generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+ generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
fail_if(ops->read_segment == NULL);
if ( (rc = ops->read_segment(src.val, ®, ctxt)) != 0 )
return rc;
@@ -2066,7 +2113,7 @@ x86_emulate(
case 0x07: /* pop %%es */
src.val = x86_seg_es;
pop_seg:
- generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+ generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
fail_if(ops->write_segment == NULL);
/* 64-bit mode: POP defaults to a 64-bit operand. */
if ( mode_64bit() && (op_bytes == 4) )
@@ -2721,7 +2768,7 @@ x86_emulate(
unsigned long sel;
dst.val = x86_seg_es;
les: /* dst.val identifies the segment */
- generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+ generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
generate_exception_if(src.type != OP_MEM, EXC_UD, -1);
if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes,
&sel, 2, ctxt, ops)) != 0 )
@@ -3862,7 +3909,7 @@ x86_emulate(
put_stub(stub);
return rc;
- twobyte_insn:
+ ext_0f_insn:
switch ( b )
{
case 0x00: /* Grp6 */
@@ -4765,6 +4812,72 @@ x86_emulate(
}
goto writeback;
+ ext_0f38_insn:
+ switch ( b )
+ {
+ case 0xf0: case 0xf1: /* movbe / crc32 */
+ generate_exception_if(repe_prefix(), EXC_UD, -1);
+ if ( repne_prefix() )
+ {
+ /* crc32 */
+#ifdef HAVE_GAS_SSE4_2
+ host_and_vcpu_must_have(sse4_2);
+ dst.bytes = rex_prefix & REX_W ? 8 : 4;
+ switch ( op_bytes )
+ {
+ case 1:
+ asm ( "crc32b %1,%k0" : "+r" (dst.val)
+ : "qm" (*(uint8_t *)&src.val) );
+ break;
+ case 2:
+ asm ( "crc32w %1,%k0" : "+r" (dst.val)
+ : "rm" (*(uint16_t *)&src.val) );
+ break;
+ case 4:
+ asm ( "crc32l %1,%k0" : "+r" (dst.val)
+ : "rm" (*(uint32_t *)&src.val) );
+ break;
+# ifdef __x86_64__
+ case 8:
+ asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) );
+ break;
+# endif
+ default:
+ ASSERT_UNREACHABLE();
+ }
+#else /* !HAVE_GAS_SSE4_2 */
+ goto cannot_emulate;
+#endif
+ }
+ else
+ {
+ /* movbe */
+ vcpu_must_have_movbe();
+ switch ( op_bytes )
+ {
+ case 2:
+ asm ( "xchg %h0,%b0" : "=Q" (dst.val)
+ : "0" (*(uint32_t *)&src.val) );
+ break;
+ case 4:
+#ifdef __x86_64__
+ asm ( "bswap %k0" : "=r" (dst.val)
+ : "0" (*(uint32_t *)&src.val) );
+ break;
+ case 8:
+#endif
+ asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) );
+ break;
+ default:
+ ASSERT_UNREACHABLE();
+ }
+ }
+ break;
+ default:
+ goto cannot_emulate;
+ }
+ goto writeback;
+
cannot_emulate:
_put_fpu();
put_stub(stub);
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -189,6 +189,7 @@
#define cpu_has_sse boot_cpu_has(X86_FEATURE_SSE)
#define cpu_has_sse2 boot_cpu_has(X86_FEATURE_SSE2)
#define cpu_has_sse3 boot_cpu_has(X86_FEATURE_SSE3)
+#define cpu_has_sse4_2 boot_cpu_has(X86_FEATURE_SSE4_2)
#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
#define cpu_has_mp 1
#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
[-- Attachment #2: x86emul-movbe.patch --]
[-- Type: text/plain, Size: 12670 bytes --]
x86emul: support MOVBE and CRC32
The former in an attempt to at least gradually support all simple data
movement instructions. The latter just because it shares the opcode
with the former.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -78,7 +78,14 @@ static int cpuid(
unsigned int *edx,
struct x86_emulate_ctxt *ctxt)
{
+ unsigned int leaf = *eax;
+
asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=d" (*edx), "=b" (*ebx));
+
+ /* The emulator doesn't itself use MOVBE, so we can always run the test. */
+ if ( leaf == 1 )
+ *ecx |= 1U << 22;
+
return X86EMUL_OKAY;
}
@@ -605,6 +612,34 @@ int main(int argc, char **argv)
printf("skipped\n");
#endif
+ printf("%-40s", "Testing movbe (%%ecx),%%eax...");
+ instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf0; instr[3] = 0x01;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+ regs.ecx = (unsigned long)res;
+ regs.eax = 0x11111111;
+ *res = 0x12345678;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) ||
+ (*res != 0x12345678) ||
+ (regs.eax != 0x78563412) ||
+ (regs.eflags != 0x200) ||
+ (regs.eip != (unsigned long)&instr[4]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing movbe %%ax,(%%ecx)...");
+ instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf1; instr[4] = 0x01;
+ regs.eip = (unsigned long)&instr[0];
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) ||
+ (*res != 0x12341234) ||
+ (regs.eax != 0x78563412) ||
+ (regs.eflags != 0x200) ||
+ (regs.eip != (unsigned long)&instr[5]) )
+ goto fail;
+ printf("okay\n");
+
#define decl_insn(which) extern const unsigned char which[], which##_len[]
#define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \
#which ": " insn "\n" \
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -12,6 +12,7 @@ typedef bool bool_t;
#define BUG() abort()
#define ASSERT assert
+#define ASSERT_UNREACHABLE() assert(!__LINE__)
#define cpu_has_amd_erratum(nr) 0
#define mark_regs_dirty(r) ((void)(r))
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -16,6 +16,7 @@ CFLAGS += -msoft-float
$(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
$(call cc-option-add,CFLAGS,CC,-Wnested-externs)
$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_GAS_SSE4_2)
$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
$(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
$(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -188,7 +188,7 @@ static uint8_t twobyte_table[256] = {
ImplicitOps, ImplicitOps, ImplicitOps, 0,
ImplicitOps, ImplicitOps, 0, 0,
/* 0x38 - 0x3F */
- 0, 0, 0, 0, 0, 0, 0, 0,
+ DstReg|SrcMem|ModRM, 0, 0, 0, 0, 0, 0, 0,
/* 0x40 - 0x47 */
DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
@@ -1091,6 +1091,8 @@ static bool_t vcpu_has(
#define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)
#define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX, 0)
#define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
+#define vcpu_must_have_sse4_2() vcpu_must_have(0x00000001, ECX, 20)
+#define vcpu_must_have_movbe() vcpu_must_have(0x00000001, ECX, 22)
#define vcpu_must_have_avx() vcpu_must_have(0x00000001, ECX, 28)
#ifdef __XEN__
@@ -1503,8 +1505,9 @@ x86_emulate(
/* Shadow copy of register state. Committed on successful emulation. */
struct cpu_user_regs _regs = *ctxt->regs;
- uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0;
+ uint8_t b, d, sib, sib_index, sib_base, rex_prefix = 0;
uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
+ enum { ext_none, ext_0f, ext_0f38 } ext = ext_none;
union vex vex = {};
unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes;
bool_t lock_prefix = 0;
@@ -1600,9 +1603,18 @@ x86_emulate(
/* Two-byte opcode? */
if ( b == 0x0f )
{
- twobyte = 1;
b = insn_fetch_type(uint8_t);
d = twobyte_table[b];
+ switch ( b )
+ {
+ default:
+ ext = ext_0f;
+ break;
+ case 0x38:
+ b = insn_fetch_type(uint8_t);
+ ext = ext_0f38;
+ break;
+ }
}
/* Unrecognised? */
@@ -1619,7 +1631,7 @@ x86_emulate(
modrm = insn_fetch_type(uint8_t);
modrm_mod = (modrm & 0xc0) >> 6;
- if ( !twobyte && ((b & ~1) == 0xc4) )
+ if ( !ext && ((b & ~1) == 0xc4) )
switch ( def_ad_bytes )
{
default:
@@ -1665,12 +1677,12 @@ x86_emulate(
rex_prefix |= REX_R;
fail_if(vex.opcx != vex_0f);
- twobyte = 1;
+ ext = ext_0f;
b = insn_fetch_type(uint8_t);
d = twobyte_table[b];
/* Unrecognised? */
- if ( d == 0 )
+ if ( d == 0 || b == 0x38 )
goto cannot_emulate;
modrm = insn_fetch_type(uint8_t);
@@ -1756,7 +1768,7 @@ x86_emulate(
{
ea.mem.seg = x86_seg_ss;
ea.mem.off += _regs.esp;
- if ( !twobyte && (b == 0x8f) )
+ if ( !ext && (b == 0x8f) )
/* POP <rm> computes its EA post increment. */
ea.mem.off += ((mode_64bit() && (op_bytes == 4))
? 8 : op_bytes);
@@ -1791,12 +1803,12 @@ x86_emulate(
((op_bytes == 8) ? 4 : op_bytes);
else if ( (d & SrcMask) == SrcImmByte )
ea.mem.off += 1;
- else if ( !twobyte && ((b & 0xfe) == 0xf6) &&
+ else if ( !ext && ((b & 0xfe) == 0xf6) &&
((modrm_reg & 7) <= 1) )
/* Special case in Grp3: test has immediate operand. */
ea.mem.off += (d & ByteOp) ? 1
: ((op_bytes == 8) ? 4 : op_bytes);
- else if ( twobyte && ((b & 0xf7) == 0xa4) )
+ else if ( ext == ext_0f && ((b & 0xf7) == 0xa4) )
/* SHLD/SHRD with immediate byte third operand. */
ea.mem.off++;
break;
@@ -1815,7 +1827,9 @@ x86_emulate(
ea.mem.seg = override_seg;
/* Early operand adjustments. */
- if ( !twobyte )
+ switch ( ext )
+ {
+ case ext_none:
switch ( b )
{
case 0xf6 ... 0xf7: /* Grp3 */
@@ -1848,6 +1862,29 @@ x86_emulate(
}
break;
}
+ break;
+
+ case ext_0f:
+ break;
+
+ case ext_0f38:
+ switch ( b )
+ {
+ case 0xf0: /* movbe / crc32 */
+ d |= repne_prefix() ? ByteOp : Mov;
+ break;
+ case 0xf1: /* movbe / crc32 */
+ if ( !repne_prefix() )
+ d = (d & ~(DstMask | SrcMask)) | DstMem | SrcReg | Mov;
+ break;
+ default: /* Until it is worth making this table based ... */
+ goto cannot_emulate;
+ }
+ break;
+
+ default:
+ ASSERT_UNREACHABLE();
+ }
/* Decode and fetch the source operand: register, memory or immediate. */
switch ( d & SrcMask )
@@ -2006,8 +2043,18 @@ x86_emulate(
break;
}
- if ( twobyte )
- goto twobyte_insn;
+ switch ( ext )
+ {
+ case ext_none:
+ break;
+ case ext_0f:
+ goto ext_0f_insn;
+ case ext_0f38:
+ goto ext_0f38_insn;
+ default:
+ ASSERT_UNREACHABLE();
+ goto cannot_emulate;
+ }
switch ( b )
{
@@ -2050,7 +2097,7 @@ x86_emulate(
struct segment_register reg;
src.val = x86_seg_es;
push_seg:
- generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+ generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
fail_if(ops->read_segment == NULL);
if ( (rc = ops->read_segment(src.val, ®, ctxt)) != 0 )
return rc;
@@ -2066,7 +2113,7 @@ x86_emulate(
case 0x07: /* pop %%es */
src.val = x86_seg_es;
pop_seg:
- generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+ generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
fail_if(ops->write_segment == NULL);
/* 64-bit mode: POP defaults to a 64-bit operand. */
if ( mode_64bit() && (op_bytes == 4) )
@@ -2721,7 +2768,7 @@ x86_emulate(
unsigned long sel;
dst.val = x86_seg_es;
les: /* dst.val identifies the segment */
- generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
+ generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
generate_exception_if(src.type != OP_MEM, EXC_UD, -1);
if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes,
&sel, 2, ctxt, ops)) != 0 )
@@ -3862,7 +3909,7 @@ x86_emulate(
put_stub(stub);
return rc;
- twobyte_insn:
+ ext_0f_insn:
switch ( b )
{
case 0x00: /* Grp6 */
@@ -4765,6 +4812,72 @@ x86_emulate(
}
goto writeback;
+ ext_0f38_insn:
+ switch ( b )
+ {
+ case 0xf0: case 0xf1: /* movbe / crc32 */
+ generate_exception_if(repe_prefix(), EXC_UD, -1);
+ if ( repne_prefix() )
+ {
+ /* crc32 */
+#ifdef HAVE_GAS_SSE4_2
+ host_and_vcpu_must_have(sse4_2);
+ dst.bytes = rex_prefix & REX_W ? 8 : 4;
+ switch ( op_bytes )
+ {
+ case 1:
+ asm ( "crc32b %1,%k0" : "+r" (dst.val)
+ : "qm" (*(uint8_t *)&src.val) );
+ break;
+ case 2:
+ asm ( "crc32w %1,%k0" : "+r" (dst.val)
+ : "rm" (*(uint16_t *)&src.val) );
+ break;
+ case 4:
+ asm ( "crc32l %1,%k0" : "+r" (dst.val)
+ : "rm" (*(uint32_t *)&src.val) );
+ break;
+# ifdef __x86_64__
+ case 8:
+ asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) );
+ break;
+# endif
+ default:
+ ASSERT_UNREACHABLE();
+ }
+#else /* !HAVE_GAS_SSE4_2 */
+ goto cannot_emulate;
+#endif
+ }
+ else
+ {
+ /* movbe */
+ vcpu_must_have_movbe();
+ switch ( op_bytes )
+ {
+ case 2:
+ asm ( "xchg %h0,%b0" : "=Q" (dst.val)
+ : "0" (*(uint32_t *)&src.val) );
+ break;
+ case 4:
+#ifdef __x86_64__
+ asm ( "bswap %k0" : "=r" (dst.val)
+ : "0" (*(uint32_t *)&src.val) );
+ break;
+ case 8:
+#endif
+ asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) );
+ break;
+ default:
+ ASSERT_UNREACHABLE();
+ }
+ }
+ break;
+ default:
+ goto cannot_emulate;
+ }
+ goto writeback;
+
cannot_emulate:
_put_fpu();
put_stub(stub);
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -189,6 +189,7 @@
#define cpu_has_sse boot_cpu_has(X86_FEATURE_SSE)
#define cpu_has_sse2 boot_cpu_has(X86_FEATURE_SSE2)
#define cpu_has_sse3 boot_cpu_has(X86_FEATURE_SSE3)
+#define cpu_has_sse4_2 boot_cpu_has(X86_FEATURE_SSE4_2)
#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
#define cpu_has_mp 1
#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 7+ messages in thread