All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 00/14] x86: emulator enhancements
@ 2018-03-15 12:57 Jan Beulich
  2018-03-15 13:02 ` [PATCH v5 01/14] x86emul: support 3DNow! insns Jan Beulich
                   ` (13 more replies)
  0 siblings, 14 replies; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 12:57 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

01: support 3DNow! insns
02: place test blobs in executable section
03: abstract out XCRn accesses
04: adjust_bnd() should check XCR0
05: eliminate custom #MF/#XM handling
06: tell cmpxchg hook whether LOCK is in effect
07: correctly handle CMPXCHG* comparison failures
08: add read-modify-write hook
09: also handle shifts through ->rmw()
10: HVM: do actual CMPXCHG in hvmemul_cmpxchg()
11: HVM: make use of new read-modify-write emulator hook
12: HVM: use x86emul_write_xcr()
13: shadow: fully move unmap-dest into common code
14: shadow: fold sh_x86_emulate_{write,cmpxchg}() into their only callers

Signed-off-by: Jan Beulich <jbeulich@suse.com>

v5: Mainly patches 1 and 3 changed, while patches 9 and 12 are new.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 01/14] x86emul: support 3DNow! insns
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
@ 2018-03-15 13:02 ` Jan Beulich
  2018-03-15 13:24   ` Andrew Cooper
  2018-03-15 13:03 ` [PATCH v5 02/14] x86emul: place test blobs in executable section Jan Beulich
                   ` (12 subsequent siblings)
  13 siblings, 1 reply; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:02 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

Yes, recent AMD CPUs don't support them anymore, but I think we should
nevertheless cope.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: Correct pfpnacc table entry. Correct pfcmpgt and pswapd comments.
    Check base table before extensions one.
v4: Fix PI2FW table entry. Add comment to _3dnow{,_ext}_table[]
    describing the encoding.
v3: Re-base.

--- a/.gitignore
+++ b/.gitignore
@@ -238,6 +238,7 @@ tools/security/xen/*
 tools/security/xensec_tool
 tools/tests/x86_emulator/*.bin
 tools/tests/x86_emulator/*.tmp
+tools/tests/x86_emulator/3dnow*.[ch]
 tools/tests/x86_emulator/asm
 tools/tests/x86_emulator/avx*.[ch]
 tools/tests/x86_emulator/blowfish.h
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -11,7 +11,7 @@ all: $(TARGET)
 run: $(TARGET)
 	./$(TARGET)
 
-SIMD := sse sse2 sse4 avx avx2 xop
+SIMD := 3dnow sse sse2 sse4 avx avx2 xop
 FMA := fma4 fma
 SG := avx2-sg
 TESTCASES := blowfish $(SIMD) $(FMA) $(SG)
@@ -19,6 +19,9 @@ TESTCASES := blowfish $(SIMD) $(FMA) $(S
 blowfish-cflags := ""
 blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
 
+3dnow-vecs := 8
+3dnow-ints :=
+3dnow-flts := 4
 sse-vecs := 16
 sse-ints :=
 sse-flts := 4
@@ -49,8 +52,13 @@ xop-ints := 1 2 4 8
 xop-flts := $(avx-flts)
 
 # For AVX and later, have the compiler avoid XMM0 to widen coverage of
-# the VEX.vvvv checks in the emulator.
-non-sse = $(if $(filter sse%,$(1)),,-ffixed-xmm0)
+# the VEX.vvvv checks in the emulator.  For 3DNow!, however, force SSE
+# use for floating point operations, to avoid mixing MMX and FPU register
+# uses.  Also enable 3DNow! extensions, but note that we can't use 3dnowa
+# as the test flavor right away since -m3dnowa is being understood only
+# by gcc 7.x and newer (older ones want a specific machine model instead).
+3dnowa := $(call cc-option,$(CC),-m3dnowa,-march=k8)
+non-sse = $(if $(filter sse%,$(1)),,$(if $(filter 3dnow%,$(1)),-msse -mfpmath=sse $(3dnowa),-ffixed-xmm0))
 
 define simd-defs
 $(1)-cflags := \
@@ -81,8 +89,9 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c t
 	$(foreach arch,$(filter-out $(XEN_COMPILE_ARCH),x86_32) $(XEN_COMPILE_ARCH), \
 	    for cflags in $($*-cflags) $($*-cflags-$(arch)); do \
 		$(MAKE) -f testcase.mk TESTCASE=$* XEN_TARGET_ARCH=$(arch) $*-cflags="$$cflags" all; \
+		prefix=$(shell echo $(subst -,_,$*) | sed -e 's,^\([0-9]\),_\1,'); \
 		flavor=$$(echo $${cflags} | sed -e 's, .*,,' -e 'y,-=,__,') ; \
-		(echo "static const unsigned int $(subst -,_,$*)_$(arch)$${flavor}[] = {"; \
+		(echo "static const unsigned int $${prefix}_$(arch)$${flavor}[] = {"; \
 		 od -v -t x $*.bin | sed -e 's/^[0-9]* /0x/' -e 's/ /, 0x/g' -e 's/$$/,/'; \
 		 echo "};") >>$@.new; \
 		rm -f $*.bin; \
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -48,6 +48,8 @@ static inline bool _to_bool(byte_vec_t b
 
 #if VEC_SIZE == FLOAT_SIZE
 # define to_int(x) ((vec_t){ (int)(x)[0] })
+#elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__)
+# define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))
 #elif VEC_SIZE == 16 && defined(__SSE2__)
 # if FLOAT_SIZE == 4
 #  define to_int(x) __builtin_ia32_cvtdq2ps(__builtin_ia32_cvtps2dq(x))
@@ -70,7 +72,24 @@ static inline bool _to_bool(byte_vec_t b
 })
 #endif
 
-#if FLOAT_SIZE == 4 && defined(__SSE__)
+#if VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW_A__)
+# define max __builtin_ia32_pfmax
+# define min __builtin_ia32_pfmin
+# define recip(x) ({ \
+    vec_t t_ = __builtin_ia32_pfrcp(x); \
+    touch(x); \
+    t_[1] = __builtin_ia32_pfrcp(__builtin_ia32_pswapdsf(x))[0]; \
+    touch(x); \
+    __builtin_ia32_pfrcpit2(__builtin_ia32_pfrcpit1(t_, x), t_); \
+})
+# define rsqrt(x) ({ \
+    vec_t t_ = __builtin_ia32_pfrsqrt(x); \
+    touch(x); \
+    t_[1] = __builtin_ia32_pfrsqrt(__builtin_ia32_pswapdsf(x))[0]; \
+    touch(x); \
+    __builtin_ia32_pfrcpit2(__builtin_ia32_pfrsqit1(__builtin_ia32_pfmul(t_, t_), x), t_); \
+})
+#elif FLOAT_SIZE == 4 && defined(__SSE__)
 # if VEC_SIZE == 32 && defined(__AVX__)
 #  if defined(__AVX2__)
 #   define broadcast(x) \
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -5,6 +5,7 @@
 
 #include "x86-emulate.h"
 #include "blowfish.h"
+#include "3dnow.h"
 #include "sse.h"
 #include "sse2.h"
 #include "sse4.h"
@@ -28,6 +29,11 @@ static bool blowfish_check_regs(const st
     return regs->eax == 2 && regs->edx == 1;
 }
 
+static bool simd_check__3dnow(void)
+{
+    return cpu_has_3dnow_ext && cpu_has_sse;
+}
+
 static bool simd_check_sse(void)
 {
     return cpu_has_sse;
@@ -117,6 +123,7 @@ static const struct {
 #else
 # define SIMD(desc, feat, form) SIMD_(32, desc, feat, form)
 #endif
+    SIMD(3DNow! single,          _3dnow,     8f4),
     SIMD(SSE scalar single,      sse,         f4),
     SIMD(SSE packed single,      sse,       16f4),
     SIMD(SSE2 scalar single,     sse2,        f4),
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -198,6 +198,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 8)) != 0; \
 })
 
+#define cpu_has_3dnow_ext ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(0x80000001, 0, &res, NULL); \
+    (res.d & (1U << 30)) != 0; \
+})
+
 #define cpu_has_sse4a ({ \
     struct cpuid_leaf res; \
     emul_test_cpuid(0x80000001, 0, &res, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -356,6 +356,41 @@ static const struct twobyte_table {
 };
 
 /*
+ * The next two tables are indexed by high opcode extension byte (the one
+ * that's encoded like an immediate) nibble, with each table element then
+ * bit-indexed by low opcode extension byte nibble.
+ */
+static const uint16_t _3dnow_table[16] = {
+    [0x0] = (1 << 0xd) /* pi2fd */,
+    [0x1] = (1 << 0xd) /* pf2id */,
+    [0x9] = (1 << 0x0) /* pfcmpge */ |
+            (1 << 0x4) /* pfmin */ |
+            (1 << 0x6) /* pfrcp */ |
+            (1 << 0x7) /* pfrsqrt */ |
+            (1 << 0xa) /* pfsub */ |
+            (1 << 0xe) /* pfadd */,
+    [0xa] = (1 << 0x0) /* pfcmpgt */ |
+            (1 << 0x4) /* pfmax */ |
+            (1 << 0x6) /* pfrcpit1 */ |
+            (1 << 0x7) /* pfrsqit1 */ |
+            (1 << 0xa) /* pfsubr */ |
+            (1 << 0xe) /* pfacc */,
+    [0xb] = (1 << 0x0) /* pfcmpeq */ |
+            (1 << 0x4) /* pfmul */ |
+            (1 << 0x6) /* pfrcpit2 */ |
+            (1 << 0x7) /* pmulhrw */ |
+            (1 << 0xf) /* pavgusb */,
+};
+
+static const uint16_t _3dnow_ext_table[16] = {
+    [0x0] = (1 << 0xc) /* pi2fw */,
+    [0x1] = (1 << 0xc) /* pf2iw */,
+    [0x8] = (1 << 0xa) /* pfnacc */ |
+            (1 << 0xe) /* pfpnacc */,
+    [0xb] = (1 << 0xb) /* pswapd */,
+};
+
+/*
  * "two_op" and "four_op" below refer to the number of register operands
  * (one of which possibly also allowing to be a memory one). The named
  * operand counts do not include any immediate operands.
@@ -1659,6 +1694,8 @@ static bool vcpu_has(
 #define vcpu_has_rdrand()      vcpu_has(         1, ECX, 30, ctxt, ops)
 #define vcpu_has_mmxext()     (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \
                                vcpu_has_sse())
+#define vcpu_has_3dnow_ext()   vcpu_has(0x80000001, EDX, 30, ctxt, ops)
+#define vcpu_has_3dnow()       vcpu_has(0x80000001, EDX, 31, ctxt, ops)
 #define vcpu_has_lahf_lm()     vcpu_has(0x80000001, ECX,  0, ctxt, ops)
 #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
@@ -5374,6 +5411,26 @@ x86_emulate(
     case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */
         break;
 
+    case X86EMUL_OPC(0x0f, 0x0e): /* femms */
+        host_and_vcpu_must_have(3dnow);
+        asm volatile ( "femms" );
+        break;
+
+    case X86EMUL_OPC(0x0f, 0x0f): /* 3DNow! */
+        if ( _3dnow_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) )
+            host_and_vcpu_must_have(3dnow);
+        else if ( _3dnow_ext_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) )
+            host_and_vcpu_must_have(3dnow_ext);
+        else
+            generate_exception(EXC_UD);
+
+        get_fpu(X86EMUL_FPU_mmx, &fic);
+
+        d = DstReg | SrcMem;
+        op_bytes = 8;
+        state->simd_size = simd_other;
+        goto simd_0f_imm8;
+
 #define CASE_SIMD_PACKED_INT(pfx, opc)       \
     case X86EMUL_OPC(pfx, opc):              \
     case X86EMUL_OPC_66(pfx, opc)
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -71,6 +71,8 @@
                                  && boot_cpu_has(X86_FEATURE_FFXSR))
 #define cpu_has_page1gb         boot_cpu_has(X86_FEATURE_PAGE1GB)
 #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)
+#define cpu_has_3dnow_ext       boot_cpu_has(X86_FEATURE_3DNOWEXT)
+#define cpu_has_3dnow           boot_cpu_has(X86_FEATURE_3DNOW)
 
 /* CPUID level 0x80000001.ecx */
 #define cpu_has_cmp_legacy      boot_cpu_has(X86_FEATURE_CMP_LEGACY)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 02/14] x86emul: place test blobs in executable section
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
  2018-03-15 13:02 ` [PATCH v5 01/14] x86emul: support 3DNow! insns Jan Beulich
@ 2018-03-15 13:03 ` Jan Beulich
  2018-03-15 13:04 ` [PATCH v5 03/14] x86emul: abstract out XCRn accesses Jan Beulich
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:03 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

This allows the section contents to be disassembled without going
through any extra hoops, simplifying the analysis of problems in test
and/or emulation code.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v5: Use Andrew's section attributes trick (and drop respective part of
    the description).
v3: New.

--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -91,7 +91,8 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c t
 		$(MAKE) -f testcase.mk TESTCASE=$* XEN_TARGET_ARCH=$(arch) $*-cflags="$$cflags" all; \
 		prefix=$(shell echo $(subst -,_,$*) | sed -e 's,^\([0-9]\),_\1,'); \
 		flavor=$$(echo $${cflags} | sed -e 's, .*,,' -e 'y,-=,__,') ; \
-		(echo "static const unsigned int $${prefix}_$(arch)$${flavor}[] = {"; \
+		(echo 'static const unsigned int __attribute__((section(".test, \"ax\", @progbits #")))' \
+		      "$${prefix}_$(arch)$${flavor}[] = {"; \
 		 od -v -t x $*.bin | sed -e 's/^[0-9]* /0x/' -e 's/ /, 0x/g' -e 's/$$/,/'; \
 		 echo "};") >>$@.new; \
 		rm -f $*.bin; \
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -4,6 +4,9 @@
 #include <sys/mman.h>
 
 #include "x86-emulate.h"
+
+asm ( ".pushsection .test, \"ax\", @progbits; .popsection" );
+
 #include "blowfish.h"
 #include "3dnow.h"
 #include "sse.h"
@@ -1142,9 +1145,9 @@ int main(int argc, char **argv)
 
 #define decl_insn(which) extern const unsigned char which[], \
                          which##_end[] asm ( ".L" #which "_end" )
-#define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \
-                              #which ": " insn "\n"                     \
-                              ".L" #which "_end:\n"                     \
+#define put_insn(which, insn) ".pushsection .test\n" \
+                              #which ": " insn "\n"  \
+                              ".L" #which "_end:\n"  \
                               ".popsection"
 #define set_insn(which) (regs.eip = (unsigned long)(which))
 #define valid_eip(which) (regs.eip >= (unsigned long)(which) && \




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 03/14] x86emul: abstract out XCRn accesses
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
  2018-03-15 13:02 ` [PATCH v5 01/14] x86emul: support 3DNow! insns Jan Beulich
  2018-03-15 13:03 ` [PATCH v5 02/14] x86emul: place test blobs in executable section Jan Beulich
@ 2018-03-15 13:04 ` Jan Beulich
  2018-03-15 13:35   ` Andrew Cooper
                     ` (3 more replies)
  2018-03-15 13:04 ` [PATCH v5 04/14] x86emul: adjust_bnd() should check XCR0 Jan Beulich
                   ` (10 subsequent siblings)
  13 siblings, 4 replies; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:04 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper, Paul Durrant

Use hooks, just like done for other special purpose registers.

This includes moving XCR0 checks from hvmemul_get_fpu() to the emulator
itself as well as adding support for XGETBV emulation.

For now fuzzer reads will obtain the real values (minus the fuzzing of
the hook pointer itself).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: Move index validation into hook functions. Introduce
    x86emul_{read,write}_xcr().
v4: Have hvmemul_read_xcr() raise an exception instead of returning
    X86EMUL_UNHANDLEABLE for invalid indexes. Introduce xgetbv() and add
    volatile to the asm() moved there. Split out _XSTATE_* movement.
v2: Re-base.

--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
@@ -459,6 +459,8 @@ static int fuzz_write_cr(
     return X86EMUL_OKAY;
 }
 
+#define fuzz_read_xcr emul_test_read_xcr
+
 enum {
     MSRI_IA32_SYSENTER_CS,
     MSRI_IA32_SYSENTER_ESP,
@@ -577,6 +579,7 @@ static const struct x86_emulate_ops all_
     SET(write_io),
     SET(read_cr),
     SET(write_cr),
+    SET(read_xcr),
     SET(read_msr),
     SET(write_msr),
     SET(wbinvd),
@@ -685,6 +688,7 @@ enum {
     HOOK_write_cr,
     HOOK_read_dr,
     HOOK_write_dr,
+    HOOK_read_xcr,
     HOOK_read_msr,
     HOOK_write_msr,
     HOOK_wbinvd,
@@ -729,6 +733,7 @@ static void disable_hooks(struct x86_emu
     MAYBE_DISABLE_HOOK(write_io);
     MAYBE_DISABLE_HOOK(read_cr);
     MAYBE_DISABLE_HOOK(write_cr);
+    MAYBE_DISABLE_HOOK(read_xcr);
     MAYBE_DISABLE_HOOK(read_msr);
     MAYBE_DISABLE_HOOK(write_msr);
     MAYBE_DISABLE_HOOK(wbinvd);
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -371,6 +371,7 @@ static struct x86_emulate_ops emulops =
     .read_segment = read_segment,
     .cpuid      = emul_test_cpuid,
     .read_cr    = emul_test_read_cr,
+    .read_xcr   = emul_test_read_xcr,
     .read_msr   = read_msr,
     .get_fpu    = emul_test_get_fpu,
     .put_fpu    = emul_test_put_fpu,
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -163,6 +163,35 @@ int emul_test_read_cr(
     return X86EMUL_UNHANDLEABLE;
 }
 
+int emul_test_read_xcr(
+    unsigned int reg,
+    uint64_t *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    uint32_t lo, hi;
+
+    ASSERT(cpu_has_xsave);
+
+    switch ( reg )
+    {
+    case 0:
+        break;
+
+    case 1:
+        if ( cpu_has_xgetbv1 )
+            break;
+        /* fall through */
+    default:
+        x86_emul_hw_exception(13 /* #GP */, 0, ctxt);
+        return X86EMUL_EXCEPTION;
+    }
+
+    asm ( "xgetbv" : "=a" (lo), "=d" (hi) : "c" (reg) );
+    *val = lo | ((uint64_t)hi << 32);
+
+    return X86EMUL_OKAY;
+}
+
 int emul_test_get_fpu(
     void (*exception_callback)(void *, struct cpu_user_regs *),
     void *exception_callback_arg,
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -186,6 +186,16 @@ static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 5)) != 0; \
 })
 
+#define cpu_has_xgetbv1 ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(1, 0, &res, NULL); \
+    if ( !(res.c & (1U << 27)) ) \
+        res.a = 0; \
+    else \
+        emul_test_cpuid(0xd, 1, &res, NULL); \
+    (res.a & (1U << 2)) != 0; \
+})
+
 #define cpu_has_bmi1 ({ \
     struct cpuid_leaf res; \
     emul_test_cpuid(7, 0, &res, NULL); \
@@ -247,6 +257,11 @@ int emul_test_read_cr(
     unsigned long *val,
     struct x86_emulate_ctxt *ctxt);
 
+int emul_test_read_xcr(
+    unsigned int reg,
+    uint64_t *val,
+    struct x86_emulate_ctxt *ctxt);
+
 int emul_test_get_fpu(
     void (*exception_callback)(void *, struct cpu_user_regs *),
     void *exception_callback_arg,
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1826,6 +1826,29 @@ static int hvmemul_write_cr(
     return rc;
 }
 
+static int hvmemul_read_xcr(
+    unsigned int reg,
+    uint64_t *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    int rc = x86emul_read_xcr(reg, val, ctxt);
+
+    if ( rc == X86EMUL_OKAY )
+        HVMTRACE_LONG_2D(XCR_READ, reg, TRC_PAR_LONG(*val));
+
+    return rc;
+}
+
+static int hvmemul_write_xcr(
+    unsigned int reg,
+    uint64_t val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    HVMTRACE_LONG_2D(XCR_WRITE, reg, TRC_PAR_LONG(val));
+
+    return x86emul_write_xcr(reg, val, ctxt);
+}
+
 static int hvmemul_read_msr(
     unsigned int reg,
     uint64_t *val,
@@ -1874,22 +1897,6 @@ static int hvmemul_get_fpu(
 {
     struct vcpu *curr = current;
 
-    switch ( type )
-    {
-    case X86EMUL_FPU_fpu:
-    case X86EMUL_FPU_wait:
-    case X86EMUL_FPU_mmx:
-    case X86EMUL_FPU_xmm:
-        break;
-    case X86EMUL_FPU_ymm:
-        if ( !(curr->arch.xcr0 & X86_XCR0_SSE) ||
-             !(curr->arch.xcr0 & X86_XCR0_YMM) )
-            return X86EMUL_UNHANDLEABLE;
-        break;
-    default:
-        return X86EMUL_UNHANDLEABLE;
-    }
-
     if ( !curr->fpu_dirtied )
         hvm_funcs.fpu_dirty_intercept();
     else if ( type == X86EMUL_FPU_fpu )
@@ -2073,6 +2080,8 @@ static const struct x86_emulate_ops hvm_
     .write_io      = hvmemul_write_io,
     .read_cr       = hvmemul_read_cr,
     .write_cr      = hvmemul_write_cr,
+    .read_xcr      = hvmemul_read_xcr,
+    .write_xcr     = hvmemul_write_xcr,
     .read_msr      = hvmemul_read_msr,
     .write_msr     = hvmemul_write_msr,
     .wbinvd        = hvmemul_wbinvd,
@@ -2098,6 +2107,8 @@ static const struct x86_emulate_ops hvm_
     .write_io      = hvmemul_write_io_discard,
     .read_cr       = hvmemul_read_cr,
     .write_cr      = hvmemul_write_cr,
+    .read_xcr      = hvmemul_read_xcr,
+    .write_xcr     = hvmemul_write_xcr,
     .read_msr      = hvmemul_read_msr,
     .write_msr     = hvmemul_write_msr_discard,
     .wbinvd        = hvmemul_wbinvd_discard,
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -1317,6 +1317,7 @@ static const struct x86_emulate_ops priv
     .write_cr            = write_cr,
     .read_dr             = read_dr,
     .write_dr            = write_dr,
+    .write_xcr           = x86emul_write_xcr,
     .read_msr            = read_msr,
     .write_msr           = write_msr,
     .cpuid               = pv_emul_cpuid,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1114,10 +1114,30 @@ static int _get_fpu(
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
 {
+    uint64_t xcr0;
     int rc;
 
     fail_if(!ops->get_fpu);
     ASSERT(type != X86EMUL_FPU_none);
+
+    if ( type < X86EMUL_FPU_ymm || !ops->read_xcr ||
+         ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY )
+    {
+        ASSERT(!ctxt->event_pending);
+        xcr0 = 0;
+    }
+
+    switch ( type )
+    {
+    case X86EMUL_FPU_ymm:
+        if ( !(xcr0 & X86_XCR0_SSE) || !(xcr0 & X86_XCR0_YMM) )
+            return X86EMUL_UNHANDLEABLE;
+        break;
+
+    default:
+        break;
+    }
+
     rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);
 
     if ( rc == X86EMUL_OKAY )
@@ -5006,18 +5026,31 @@ x86_emulate(
                 _regs.eflags |= X86_EFLAGS_AC;
             break;
 
-#ifdef __XEN__
+        case 0xd0: /* xgetbv */
+            generate_exception_if(vex.pfx, EXC_UD);
+            if ( !ops->read_cr || !ops->read_xcr ||
+                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
+                cr4 = 0;
+            generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);
+            rc = ops->read_xcr(_regs.ecx, &msr_val, ctxt);
+            if ( rc != X86EMUL_OKAY )
+                goto done;
+            _regs.r(ax) = (uint32_t)msr_val;
+            _regs.r(dx) = msr_val >> 32;
+            break;
+
         case 0xd1: /* xsetbv */
             generate_exception_if(vex.pfx, EXC_UD);
-            if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
+            if ( !ops->read_cr || !ops->write_xcr ||
+                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
                 cr4 = 0;
             generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);
-            generate_exception_if(!mode_ring0() ||
-                                  handle_xsetbv(_regs.ecx,
-                                                _regs.eax | (_regs.rdx << 32)),
-                                  EXC_GP, 0);
+            generate_exception_if(!mode_ring0(), EXC_GP, 0);
+            rc = ops->write_xcr(_regs.ecx,
+                                _regs.eax | ((uint64_t)_regs.edx << 32), ctxt);
+            if ( rc != X86EMUL_OKAY )
+                goto done;
             break;
-#endif
 
         case 0xd4: /* vmfunc */
             generate_exception_if(vex.pfx, EXC_UD);
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -398,6 +398,24 @@ struct x86_emulate_ops
         struct x86_emulate_ctxt *ctxt);
 
     /*
+     * read_xcr: Read from extended control register.
+     *  @reg:   [IN ] Register to read.
+     */
+    int (*read_xcr)(
+        unsigned int reg,
+        uint64_t *val,
+        struct x86_emulate_ctxt *ctxt);
+
+    /*
+     * write_xcr: Write to extended control register.
+     *  @reg:   [IN ] Register to write.
+     */
+    int (*write_xcr)(
+        unsigned int reg,
+        uint64_t val,
+        struct x86_emulate_ctxt *ctxt);
+
+    /*
      * read_msr: Read from model-specific register.
      *  @reg:   [IN ] Register to read.
      */
@@ -683,6 +701,11 @@ static inline void x86_emulate_free_stat
 void x86_emulate_free_state(struct x86_emulate_state *state);
 #endif
 
+int x86emul_read_xcr(unsigned int reg, uint64_t *val,
+                     struct x86_emulate_ctxt *ctxt);
+int x86emul_write_xcr(unsigned int reg, uint64_t val,
+                      struct x86_emulate_ctxt *ctxt);
+
 #endif
 
 static inline void x86_emul_hw_exception(
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -42,3 +42,50 @@
 })
 
 #include "x86_emulate/x86_emulate.c"
+
+int x86emul_read_xcr(unsigned int reg, uint64_t *val,
+                     struct x86_emulate_ctxt *ctxt)
+{
+    switch ( reg )
+    {
+    case 0:
+        *val = current->arch.xcr0;
+        return X86EMUL_OKAY;
+
+    case 1:
+        if ( cpu_has_xgetbv1 && current->domain->arch.cpuid->xstate.xgetbv1 )
+            break;
+        /* fall through */
+    default:
+        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
+        return X86EMUL_EXCEPTION;
+    }
+
+    *val = xgetbv(reg);
+
+    return X86EMUL_OKAY;
+}
+
+int x86emul_write_xcr(unsigned int reg, uint64_t val,
+                      struct x86_emulate_ctxt *ctxt)
+{
+    switch ( reg )
+    {
+    case 0:
+        break;
+
+    case 1:
+        if ( cpu_has_xgetbv1 && current->domain->arch.cpuid->xstate.xgetbv1 )
+            break;
+        /* fall through */
+    default:
+    gp_fault:
+        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
+        return X86EMUL_EXCEPTION;
+    }
+
+    if ( unlikely(handle_xsetbv(reg, val) != 0) )
+        goto gp_fault;
+
+    return X86EMUL_OKAY;
+}
--- a/xen/include/asm-x86/hvm/trace.h
+++ b/xen/include/asm-x86/hvm/trace.h
@@ -33,6 +33,8 @@
 #define DO_TRC_HVM_CR_WRITE64  DEFAULT_HVM_REGACCESS
 #define DO_TRC_HVM_DR_READ     DEFAULT_HVM_REGACCESS
 #define DO_TRC_HVM_DR_WRITE    DEFAULT_HVM_REGACCESS
+#define DO_TRC_HVM_XCR_READ64  DEFAULT_HVM_REGACCESS
+#define DO_TRC_HVM_XCR_WRITE64 DEFAULT_HVM_REGACCESS
 #define DO_TRC_HVM_MSR_READ    DEFAULT_HVM_REGACCESS
 #define DO_TRC_HVM_MSR_WRITE   DEFAULT_HVM_REGACCESS
 #define DO_TRC_HVM_RDTSC       DEFAULT_HVM_REGACCESS
--- a/xen/include/asm-x86/xstate.h
+++ b/xen/include/asm-x86/xstate.h
@@ -109,6 +109,17 @@ int xstate_alloc_save_area(struct vcpu *
 void xstate_init(struct cpuinfo_x86 *c);
 unsigned int xstate_ctxt_size(u64 xcr0);
 
+static inline uint64_t xgetbv(unsigned int index)
+{
+    uint32_t lo, hi;
+
+    ASSERT(index); /* get_xcr0() should be used instead. */
+    asm volatile ( ".byte 0x0f,0x01,0xd0" /* xgetbv */
+                   : "=a" (lo), "=d" (hi) : "c" (index) );
+
+    return lo | ((uint64_t)hi << 32);
+}
+
 static inline bool xstate_all(const struct vcpu *v)
 {
     /*
--- a/xen/include/public/trace.h
+++ b/xen/include/public/trace.h
@@ -235,6 +235,8 @@
 #define TRC_HVM_TRAP             (TRC_HVM_HANDLER + 0x23)
 #define TRC_HVM_TRAP_DEBUG       (TRC_HVM_HANDLER + 0x24)
 #define TRC_HVM_VLAPIC           (TRC_HVM_HANDLER + 0x25)
+#define TRC_HVM_XCR_READ64      (TRC_HVM_HANDLER + TRC_64_FLAG + 0x26)
+#define TRC_HVM_XCR_WRITE64     (TRC_HVM_HANDLER + TRC_64_FLAG + 0x27)
 
 #define TRC_HVM_IOPORT_WRITE    (TRC_HVM_HANDLER + 0x216)
 #define TRC_HVM_IOMEM_WRITE     (TRC_HVM_HANDLER + 0x217)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 04/14] x86emul: adjust_bnd() should check XCR0
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
                   ` (2 preceding siblings ...)
  2018-03-15 13:04 ` [PATCH v5 03/14] x86emul: abstract out XCRn accesses Jan Beulich
@ 2018-03-15 13:04 ` Jan Beulich
  2018-03-15 13:06 ` [PATCH v5 05/14] x86/HVM: eliminate custom #MF/#XM handling Jan Beulich
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:04 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

Experimentally MPX instructions have been confirmed to behave as NOPs
unless both related XCR0 bits are set to 1. By implication branches
then also don't clear BNDn.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v4: Re-base over XSTATE_* renaming.

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2154,12 +2154,16 @@ static bool umip_active(struct x86_emula
 static void adjust_bnd(struct x86_emulate_ctxt *ctxt,
                        const struct x86_emulate_ops *ops, enum vex_pfx pfx)
 {
-    uint64_t bndcfg;
+    uint64_t xcr0, bndcfg;
     int rc;
 
     if ( pfx == vex_f2 || !cpu_has_mpx || !vcpu_has_mpx() )
         return;
 
+    if ( !ops->read_xcr || ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY ||
+         !(xcr0 & X86_XCR0_BNDREGS) || !(xcr0 & X86_XCR0_BNDCSR) )
+        return;
+
     if ( !mode_ring0() )
         bndcfg = read_bndcfgu();
     else if ( !ops->read_msr ||




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 05/14] x86/HVM: eliminate custom #MF/#XM handling
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
                   ` (3 preceding siblings ...)
  2018-03-15 13:04 ` [PATCH v5 04/14] x86emul: adjust_bnd() should check XCR0 Jan Beulich
@ 2018-03-15 13:06 ` Jan Beulich
  2018-03-22 14:12   ` Roger Pau Monné
  2018-03-15 13:07 ` [PATCH v5 06/14] x86emul: tell cmpxchg hook whether LOCK is in effect Jan Beulich
                   ` (8 subsequent siblings)
  13 siblings, 1 reply; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:06 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

Use the generic stub exception handling instead.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v4: Re-base.
v3: Re-base.
v2: Re-base.

--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -193,8 +193,6 @@ int emul_test_read_xcr(
 }
 
 int emul_test_get_fpu(
-    void (*exception_callback)(void *, struct cpu_user_regs *),
-    void *exception_callback_arg,
     enum x86_emulate_fpu_type type,
     struct x86_emulate_ctxt *ctxt)
 {
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -263,8 +263,6 @@ int emul_test_read_xcr(
     struct x86_emulate_ctxt *ctxt);
 
 int emul_test_get_fpu(
-    void (*exception_callback)(void *, struct cpu_user_regs *),
-    void *exception_callback_arg,
     enum x86_emulate_fpu_type type,
     struct x86_emulate_ctxt *ctxt);
 
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1890,8 +1890,6 @@ int hvmemul_cpuid(uint32_t leaf, uint32_
 }
 
 static int hvmemul_get_fpu(
-    void (*exception_callback)(void *, struct cpu_user_regs *),
-    void *exception_callback_arg,
     enum x86_emulate_fpu_type type,
     struct x86_emulate_ctxt *ctxt)
 {
@@ -1929,9 +1927,6 @@ static int hvmemul_get_fpu(
         }
     }
 
-    curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
-    curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
-
     return X86EMUL_OKAY;
 }
 
@@ -1942,8 +1937,6 @@ static void hvmemul_put_fpu(
 {
     struct vcpu *curr = current;
 
-    curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
-
     if ( aux )
     {
         typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt = curr->arch.fpu_ctxt;
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -729,7 +729,6 @@ static void do_reserved_trap(struct cpu_
 
 static void do_trap(struct cpu_user_regs *regs)
 {
-    struct vcpu *curr = current;
     unsigned int trapnr = regs->entry_vector;
     unsigned long fixup;
 
@@ -749,15 +748,6 @@ static void do_trap(struct cpu_user_regs
         return;
     }
 
-    if ( ((trapnr == TRAP_copro_error) || (trapnr == TRAP_simd_error)) &&
-         system_state >= SYS_STATE_active && is_hvm_vcpu(curr) &&
-         curr->arch.hvm_vcpu.fpu_exception_callback )
-    {
-        curr->arch.hvm_vcpu.fpu_exception_callback(
-            curr->arch.hvm_vcpu.fpu_exception_callback_arg, regs);
-        return;
-    }
-
     if ( likely((fixup = search_exception_table(regs)) != 0) )
     {
         dprintk(XENLOG_ERR, "Trap %u: %p [%ps] -> %p\n",
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1094,23 +1094,8 @@ do {
     ops->write_segment(x86_seg_cs, cs, ctxt);                           \
 })
 
-struct fpu_insn_ctxt {
-    uint8_t insn_bytes;
-    uint8_t type;
-    int8_t exn_raised;
-};
-
-static void fpu_handle_exception(void *_fic, struct cpu_user_regs *regs)
-{
-    struct fpu_insn_ctxt *fic = _fic;
-    ASSERT(regs->entry_vector < 0x20);
-    fic->exn_raised = regs->entry_vector;
-    regs->r(ip) += fic->insn_bytes;
-}
-
 static int _get_fpu(
     enum x86_emulate_fpu_type type,
-    struct fpu_insn_ctxt *fic,
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
 {
@@ -1138,14 +1123,13 @@ static int _get_fpu(
         break;
     }
 
-    rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);
+    rc = ops->get_fpu(type, ctxt);
 
     if ( rc == X86EMUL_OKAY )
     {
         unsigned long cr0;
 
         fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu);
-        fic->type = type;
 
         fail_if(!ops->read_cr);
         if ( type >= X86EMUL_FPU_xmm )
@@ -1183,37 +1167,22 @@ static int _get_fpu(
     return rc;
 }
 
-#define get_fpu(_type, _fic)                                    \
+#define get_fpu(type)                                           \
 do {                                                            \
-    rc = _get_fpu(_type, _fic, ctxt, ops);                      \
+    rc = _get_fpu(fpu_type = (type), ctxt, ops);                \
     if ( rc ) goto done;                                        \
 } while (0)
 
-#define check_fpu_exn(fic)                                      \
-do {                                                            \
-    generate_exception_if((fic)->exn_raised >= 0,               \
-                          (fic)->exn_raised);                   \
-} while (0)
-
-#define check_xmm_exn(fic)                                      \
-do {                                                            \
-    if ( (fic)->exn_raised == EXC_XM && ops->read_cr &&         \
-         ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&         \
-         !(cr4 & X86_CR4_OSXMMEXCPT) )                          \
-        (fic)->exn_raised = EXC_UD;                             \
-    check_fpu_exn(fic);                                         \
-} while (0)
-
 static void put_fpu(
-    struct fpu_insn_ctxt *fic,
+    enum x86_emulate_fpu_type type,
     bool failed_late,
     const struct x86_emulate_state *state,
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
 {
-    if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
+    if ( unlikely(failed_late) && type == X86EMUL_FPU_fpu )
         ops->put_fpu(ctxt, X86EMUL_FPU_fpu, NULL);
-    else if ( unlikely(fic->type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
+    else if ( unlikely(type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
     {
         struct x86_emul_fpu_aux aux = {
             .ip = ctxt->regs->r(ip),
@@ -1247,9 +1216,8 @@ static void put_fpu(
         }
         ops->put_fpu(ctxt, X86EMUL_FPU_none, &aux);
     }
-    else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
+    else if ( type != X86EMUL_FPU_none && ops->put_fpu )
         ops->put_fpu(ctxt, X86EMUL_FPU_none, NULL);
-    fic->type = X86EMUL_FPU_none;
 }
 
 static inline bool fpu_check_write(void)
@@ -1264,29 +1232,27 @@ static inline bool fpu_check_write(void)
 #define emulate_fpu_insn_memdst(opc, ext, arg)                          \
 do {                                                                    \
     /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */            \
-    fic.insn_bytes = 2;                                                 \
+    insn_bytes = 2;                                                     \
     memcpy(get_stub(stub),                                              \
            ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3);            \
-    invoke_stub("", "", "+m" (fic), "+m" (arg) : "a" (&(arg)));         \
+    invoke_stub("", "", "+m" (arg) : "a" (&(arg)));                     \
     put_stub(stub);                                                     \
 } while (0)
 
 #define emulate_fpu_insn_memsrc(opc, ext, arg)                          \
 do {                                                                    \
     /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */            \
-    fic.insn_bytes = 2;                                                 \
     memcpy(get_stub(stub),                                              \
            ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3);            \
-    invoke_stub("", "", "+m" (fic) : "m" (arg), "a" (&(arg)));          \
+    invoke_stub("", "", "=m" (dummy) : "m" (arg), "a" (&(arg)));        \
     put_stub(stub);                                                     \
 } while (0)
 
 #define emulate_fpu_insn_stub(bytes...)                                 \
 do {                                                                    \
     unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
-    fic.insn_bytes = nr_;                                               \
     memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
-    invoke_stub("", "", "=m" (fic) : "m" (fic));                        \
+    invoke_stub("", "", "=m" (dummy) : "i" (0));                        \
     put_stub(stub);                                                     \
 } while (0)
 
@@ -1294,12 +1260,10 @@ do {
 do {                                                                    \
     unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
     unsigned long tmp_;                                                 \
-    fic.insn_bytes = nr_;                                               \
     memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
     invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),             \
                 _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),            \
-                [eflags] "+g" (_regs.eflags), [tmp] "=&r" (tmp_),       \
-                "+m" (fic)                                              \
+                [eflags] "+g" (_regs.eflags), [tmp] "=&r" (tmp_)        \
                 : [mask] "i" (X86_EFLAGS_ZF|X86_EFLAGS_PF|X86_EFLAGS_CF)); \
     put_stub(stub);                                                     \
 } while (0)
@@ -3158,14 +3122,14 @@ x86_emulate(
     struct x86_emulate_state state;
     int rc;
     uint8_t b, d, *opc = NULL;
-    unsigned int first_byte = 0;
+    unsigned int first_byte = 0, insn_bytes = 0;
     bool singlestep = (_regs.eflags & X86_EFLAGS_TF) &&
 	    !is_branch_step(ctxt, ops);
     bool sfence = false;
     struct operand src = { .reg = PTR_POISON };
     struct operand dst = { .reg = PTR_POISON };
     unsigned long cr4;
-    struct fpu_insn_ctxt fic = { .type = X86EMUL_FPU_none, .exn_raised = -1 };
+    enum x86_emulate_fpu_type fpu_type = X86EMUL_FPU_none;
     struct x86_emulate_stub stub = {};
     DECLARE_ALIGNED(mmval_t, mmval);
 #ifdef __XEN__
@@ -3859,9 +3823,8 @@ x86_emulate(
 
     case 0x9b:  /* wait/fwait */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_wait, &fic);
+        get_fpu(X86EMUL_FPU_wait);
         emulate_fpu_insn_stub(b);
-        check_fpu_exn(&fic);
         break;
 
     case 0x9c: /* pushf */
@@ -4264,7 +4227,7 @@ x86_emulate(
 
     case 0xd8: /* FPU 0xd8 */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fadd %stN,%st */
@@ -4286,12 +4249,11 @@ x86_emulate(
             emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
             break;
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xd9: /* FPU 0xd9 */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xfb: /* fsincos */
@@ -4373,12 +4335,11 @@ x86_emulate(
             if ( dst.type == OP_MEM && !state->fpu_ctrl && !fpu_check_write() )
                 dst.type = OP_NONE;
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xda: /* FPU 0xda */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fcmovb %stN */
@@ -4395,12 +4356,11 @@ x86_emulate(
             generate_exception_if(ea.type != OP_MEM, EXC_UD);
             goto fpu_memsrc32;
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xdb: /* FPU 0xdb */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fcmovnb %stN */
@@ -4453,12 +4413,11 @@ x86_emulate(
                 generate_exception(EXC_UD);
             }
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xdc: /* FPU 0xdc */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fadd %st,%stN */
@@ -4480,12 +4439,11 @@ x86_emulate(
             emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
             break;
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xdd: /* FPU 0xdd */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* ffree %stN */
@@ -4529,12 +4487,11 @@ x86_emulate(
             if ( dst.type == OP_MEM && !state->fpu_ctrl && !fpu_check_write() )
                 dst.type = OP_NONE;
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xde: /* FPU 0xde */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* faddp %stN */
@@ -4552,12 +4509,11 @@ x86_emulate(
             emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
             break;
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xdf: /* FPU 0xdf */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xe0:
@@ -4602,7 +4558,6 @@ x86_emulate(
                 goto fpu_memdst64;
             }
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
@@ -5461,7 +5416,7 @@ x86_emulate(
         else
             generate_exception(EXC_UD);
 
-        get_fpu(X86EMUL_FPU_mmx, &fic);
+        get_fpu(X86EMUL_FPU_mmx);
 
         d = DstReg | SrcMem;
         op_bytes = 8;
@@ -5551,7 +5506,7 @@ x86_emulate(
             else
                 vcpu_must_have(sse);
     simd_0f_xmm:
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
@@ -5561,7 +5516,7 @@ x86_emulate(
     simd_0f_avx:
             host_and_vcpu_must_have(avx);
     simd_0f_ymm:
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
     simd_0f_common:
         opc = init_prefixes(stub);
@@ -5574,7 +5529,7 @@ x86_emulate(
             vex.b = 1;
             opc[1] &= 0x38;
         }
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
         break;
 
     case X86EMUL_OPC_66(0x0f, 0x12):       /* movlpd m64,xmm */
@@ -5661,12 +5616,12 @@ x86_emulate(
                 vcpu_must_have(sse2);
             else
                 vcpu_must_have(sse);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
 
         if ( ea.type == OP_MEM )
@@ -5692,14 +5647,14 @@ x86_emulate(
                 vcpu_must_have(sse2);
             else
                 vcpu_must_have(sse);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             generate_exception_if(vex.reg != 0xf, EXC_UD);
             vex.l = 0;
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
 
         opc = init_prefixes(stub);
@@ -5722,17 +5677,14 @@ x86_emulate(
             opc[1] = modrm & 0xc7;
         if ( !mode_64bit() )
             vex.w = 0;
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
         opc[2] = 0xc3;
 
         copy_REX_VEX(opc, rex_prefix, vex);
         ea.reg = decode_gpr(&_regs, modrm_reg);
-        invoke_stub("", "", "=a" (*ea.reg), "+m" (fic.exn_raised)
-                            : "c" (mmvalp), "m" (*mmvalp));
+        invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp));
 
         put_stub(stub);
-        check_xmm_exn(&fic);
-
         state->simd_size = simd_none;
         break;
 
@@ -5746,13 +5698,13 @@ x86_emulate(
                 vcpu_must_have(sse2);
             else
                 vcpu_must_have(sse);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             generate_exception_if(vex.reg != 0xf, EXC_UD);
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
 
         opc = init_prefixes(stub);
@@ -5770,20 +5722,17 @@ x86_emulate(
             vex.b = 1;
             opc[1] &= 0x38;
         }
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
         opc[2] = 0xc3;
 
         copy_REX_VEX(opc, rex_prefix, vex);
         invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),
                     _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),
                     [eflags] "+g" (_regs.eflags),
-                    [tmp] "=&r" (dummy), "+m" (*mmvalp),
-                    "+m" (fic.exn_raised)
+                    [tmp] "=&r" (dummy), "+m" (*mmvalp)
                     : "a" (mmvalp), [mask] "i" (EFLAGS_MASK));
 
         put_stub(stub);
-        check_xmm_exn(&fic);
-
         ASSERT(!state->simd_size);
         break;
 
@@ -5921,9 +5870,9 @@ x86_emulate(
         if ( !mode_64bit() )
             vex.w = 0;
         opc[1] = modrm & 0xc7;
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
     simd_0f_to_gpr:
-        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
+        opc[insn_bytes - PFX_BYTES] = 0xc3;
 
         generate_exception_if(ea.type != OP_REG, EXC_UD);
 
@@ -5942,9 +5891,9 @@ x86_emulate(
                     vcpu_must_have(sse);
             }
             if ( b == 0x50 || (vex.pfx & VEX_PREFIX_DOUBLE_MASK) )
-                get_fpu(X86EMUL_FPU_xmm, &fic);
+                get_fpu(X86EMUL_FPU_xmm);
             else
-                get_fpu(X86EMUL_FPU_mmx, &fic);
+                get_fpu(X86EMUL_FPU_mmx);
         }
         else
         {
@@ -5953,14 +5902,13 @@ x86_emulate(
                 host_and_vcpu_must_have(avx);
             else
                 host_and_vcpu_must_have(avx2);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
 
         copy_REX_VEX(opc, rex_prefix, vex);
         invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
 
         put_stub(stub);
-        check_xmm_exn(&fic);
 
         ASSERT(!state->simd_size);
         dst.bytes = 4;
@@ -6126,7 +6074,7 @@ x86_emulate(
             goto simd_0f_sse2;
     simd_0f_mmx:
         host_and_vcpu_must_have(mmx);
-        get_fpu(X86EMUL_FPU_mmx, &fic);
+        get_fpu(X86EMUL_FPU_mmx);
         goto simd_0f_common;
 
     CASE_SIMD_PACKED_INT(0x0f, 0x6e):    /* mov{d,q} r/m,{,x}mm */
@@ -6137,17 +6085,17 @@ x86_emulate(
         {
             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
         else if ( vex.pfx )
         {
             vcpu_must_have(sse2);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(mmx);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
 
     simd_0f_rm:
@@ -6159,17 +6107,14 @@ x86_emulate(
         if ( !mode_64bit() )
             vex.w = 0;
         opc[1] = modrm & 0x38;
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
         opc[2] = 0xc3;
 
         copy_REX_VEX(opc, rex_prefix, vex);
-        invoke_stub("", "", "+m" (src.val), "+m" (fic.exn_raised)
-                            : "a" (&src.val));
+        invoke_stub("", "", "+m" (src.val) : "a" (&src.val));
         dst.val = src.val;
 
         put_stub(stub);
-        check_xmm_exn(&fic);
-
         ASSERT(!state->simd_size);
         break;
 
@@ -6235,19 +6180,19 @@ x86_emulate(
                 host_and_vcpu_must_have(avx);
             }
     simd_0f_imm8_ymm:
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
         else if ( vex.pfx )
         {
     simd_0f_imm8_sse2:
             vcpu_must_have(sse2);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(mmx);
             vcpu_must_have(mmxext);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
     simd_0f_imm8:
         opc = init_prefixes(stub);
@@ -6261,7 +6206,7 @@ x86_emulate(
             opc[1] &= 0x38;
         }
         opc[2] = imm1;
-        fic.insn_bytes = PFX_BYTES + 3;
+        insn_bytes = PFX_BYTES + 3;
         break;
 
     CASE_SIMD_PACKED_INT(0x0f, 0x71):    /* Grp12 */
@@ -6289,33 +6234,31 @@ x86_emulate(
                 host_and_vcpu_must_have(avx2);
             else
                 host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
         else if ( vex.pfx )
         {
             vcpu_must_have(sse2);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(mmx);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
 
         opc = init_prefixes(stub);
         opc[0] = b;
         opc[1] = modrm;
         opc[2] = imm1;
-        fic.insn_bytes = PFX_BYTES + 3;
+        insn_bytes = PFX_BYTES + 3;
     simd_0f_reg_only:
-        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
+        opc[insn_bytes - PFX_BYTES] = 0xc3;
 
         copy_REX_VEX(opc, rex_prefix, vex);
         invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) );
 
         put_stub(stub);
-        check_xmm_exn(&fic);
-
         ASSERT(!state->simd_size);
         break;
 
@@ -6350,7 +6293,7 @@ x86_emulate(
         {
             generate_exception_if(vex.reg != 0xf, EXC_UD);
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
 
 #ifdef __x86_64__
             if ( !mode_64bit() )
@@ -6392,12 +6335,12 @@ x86_emulate(
         else
         {
             host_and_vcpu_must_have(mmx);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
 
         opc = init_prefixes(stub);
         opc[0] = b;
-        fic.insn_bytes = PFX_BYTES + 1;
+        insn_bytes = PFX_BYTES + 1;
         goto simd_0f_reg_only;
 
     case X86EMUL_OPC_66(0x0f, 0x78):     /* Grp17 */
@@ -6413,14 +6356,14 @@ x86_emulate(
         generate_exception_if(ea.type != OP_REG, EXC_UD);
 
         host_and_vcpu_must_have(sse4a);
-        get_fpu(X86EMUL_FPU_xmm, &fic);
+        get_fpu(X86EMUL_FPU_xmm);
 
         opc = init_prefixes(stub);
         opc[0] = b;
         opc[1] = modrm;
         opc[2] = imm1;
         opc[3] = imm2;
-        fic.insn_bytes = PFX_BYTES + 4;
+        insn_bytes = PFX_BYTES + 4;
         goto simd_0f_reg_only;
 
     case X86EMUL_OPC_66(0x0f, 0x79):     /* extrq xmm,xmm */
@@ -6548,7 +6491,7 @@ x86_emulate(
             vcpu_must_have(sse);
         ldmxcsr:
             generate_exception_if(src.type != OP_MEM, EXC_UD);
-            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm, &fic);
+            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm);
             generate_exception_if(src.val & ~mxcsr_mask, EXC_GP, 0);
             asm volatile ( "ldmxcsr %0" :: "m" (src.val) );
             break;
@@ -6558,7 +6501,7 @@ x86_emulate(
             vcpu_must_have(sse);
         stmxcsr:
             generate_exception_if(dst.type != OP_MEM, EXC_UD);
-            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm, &fic);
+            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm);
             asm volatile ( "stmxcsr %0" : "=m" (dst.val) );
             break;
 
@@ -6812,7 +6755,7 @@ x86_emulate(
             if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
                 goto simd_0f_imm8_sse2;
             vcpu_must_have(sse);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
             goto simd_0f_imm8;
         }
         goto simd_0f_imm8_avx;
@@ -6843,7 +6786,7 @@ x86_emulate(
             vex.w = 0;
         opc[1] = modrm & 0xc7;
         opc[2] = imm1;
-        fic.insn_bytes = PFX_BYTES + 3;
+        insn_bytes = PFX_BYTES + 3;
         goto simd_0f_to_gpr;
 
     case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */
@@ -7090,18 +7033,18 @@ x86_emulate(
             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
             d |= TwoOp;
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
         else if ( vex.pfx )
         {
             vcpu_must_have(sse2);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(mmx);
             vcpu_must_have(mmxext);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
 
         /*
@@ -7121,7 +7064,6 @@ x86_emulate(
         if ( !mode_64bit() )
             vex.w = 0;
         opc[1] = modrm & 0xc7;
-        fic.insn_bytes = PFX_BYTES + 2;
         opc[2] = 0xc3;
 
         copy_REX_VEX(opc, rex_prefix, vex);
@@ -7134,6 +7076,7 @@ x86_emulate(
         opc = init_prefixes(stub);
         opc[0] = b;
         opc[1] = modrm;
+        insn_bytes = PFX_BYTES + 2;
         /* Restore high bit of XMM destination. */
         if ( sfence )
         {
@@ -7180,12 +7123,12 @@ x86_emulate(
         if ( vex.pfx )
         {
     simd_0f38_common:
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(mmx);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
         opc = init_prefixes(stub);
         opc[0] = 0x38;
@@ -7198,7 +7141,7 @@ x86_emulate(
             vex.b = 1;
             opc[2] &= 0x38;
         }
-        fic.insn_bytes = PFX_BYTES + 3;
+        insn_bytes = PFX_BYTES + 3;
         break;
 
     case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,ymm */
@@ -7226,13 +7169,13 @@ x86_emulate(
         if ( vex.opcx == vex_none )
         {
             host_and_vcpu_must_have(sse4_1);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             generate_exception_if(vex.reg != 0xf, EXC_UD);
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
 
         opc = init_prefixes(stub);
@@ -7251,21 +7194,19 @@ x86_emulate(
             vex.b = 1;
             opc[1] &= 0x38;
         }
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
         opc[2] = 0xc3;
         if ( vex.opcx == vex_none )
         {
             /* Cover for extra prefix byte. */
             --opc;
-            ++fic.insn_bytes;
+            ++insn_bytes;
         }
 
         copy_REX_VEX(opc, rex_prefix, vex);
         emulate_stub("+m" (*mmvalp), "a" (mmvalp));
 
         put_stub(stub);
-        check_xmm_exn(&fic);
-
         state->simd_size = simd_none;
         dst.type = OP_NONE;
         break;
@@ -7354,7 +7295,7 @@ x86_emulate(
 
         generate_exception_if(ea.type != OP_MEM || vex.w, EXC_UD);
         host_and_vcpu_must_have(avx);
-        get_fpu(X86EMUL_FPU_ymm, &fic);
+        get_fpu(X86EMUL_FPU_ymm);
 
         /*
          * While we can't reasonably provide fully correct behavior here
@@ -7403,7 +7344,7 @@ x86_emulate(
         rex_prefix &= ~REX_B;
         vex.b = 1;
         opc[1] = modrm & 0x38;
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
 
         break;
     }
@@ -7452,7 +7393,7 @@ x86_emulate(
 
         generate_exception_if(ea.type != OP_MEM, EXC_UD);
         host_and_vcpu_must_have(avx2);
-        get_fpu(X86EMUL_FPU_ymm, &fic);
+        get_fpu(X86EMUL_FPU_ymm);
 
         /*
          * While we can't reasonably provide fully correct behavior here
@@ -7499,7 +7440,7 @@ x86_emulate(
         rex_prefix &= ~REX_B;
         vex.b = 1;
         opc[1] = modrm & 0x38;
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
 
         break;
     }
@@ -7522,7 +7463,7 @@ x86_emulate(
                               state->sib_index == mask_reg, EXC_UD);
         generate_exception_if(!cpu_has_avx, EXC_UD);
         vcpu_must_have(avx2);
-        get_fpu(X86EMUL_FPU_ymm, &fic);
+        get_fpu(X86EMUL_FPU_ymm);
 
         /* Read destination, index, and mask registers. */
         opc = init_prefixes(stub);
@@ -7859,12 +7800,12 @@ x86_emulate(
         if ( vex.pfx )
         {
     simd_0f3a_common:
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(mmx);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
         opc = init_prefixes(stub);
         opc[0] = 0x3a;
@@ -7878,7 +7819,7 @@ x86_emulate(
             opc[2] &= 0x38;
         }
         opc[3] = imm1;
-        fic.insn_bytes = PFX_BYTES + 4;
+        insn_bytes = PFX_BYTES + 4;
         break;
 
     case X86EMUL_OPC_66(0x0f3a, 0x14): /* pextrb $imm8,xmm,r/m */
@@ -7886,7 +7827,7 @@ x86_emulate(
     case X86EMUL_OPC_66(0x0f3a, 0x16): /* pextr{d,q} $imm8,xmm,r/m */
     case X86EMUL_OPC_66(0x0f3a, 0x17): /* extractps $imm8,xmm,r/m */
         host_and_vcpu_must_have(sse4_1);
-        get_fpu(X86EMUL_FPU_xmm, &fic);
+        get_fpu(X86EMUL_FPU_xmm);
 
         opc = init_prefixes(stub);
         opc++[0] = 0x3a;
@@ -7899,20 +7840,16 @@ x86_emulate(
             vex.w = 0;
         opc[1] = modrm & 0x38;
         opc[2] = imm1;
-        fic.insn_bytes = PFX_BYTES + 3;
         opc[3] = 0xc3;
         if ( vex.opcx == vex_none )
         {
             /* Cover for extra prefix byte. */
             --opc;
-            ++fic.insn_bytes;
         }
 
         copy_REX_VEX(opc, rex_prefix, vex);
         invoke_stub("", "", "=m" (dst.val) : "a" (&dst.val));
-
         put_stub(stub);
-        check_xmm_exn(&fic);
 
         ASSERT(!state->simd_size);
         dst.bytes = dst.type == OP_REG || b == 0x17 ? 4 : 1 << (b & 3);
@@ -7926,7 +7863,7 @@ x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x17): /* vextractps $imm8,xmm,r/m */
         generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
         host_and_vcpu_must_have(avx);
-        get_fpu(X86EMUL_FPU_ymm, &fic);
+        get_fpu(X86EMUL_FPU_ymm);
         opc = init_prefixes(stub);
         goto pextr;
 
@@ -7948,17 +7885,15 @@ x86_emulate(
             opc[1] &= 0x38;
         }
         opc[2] = imm1;
-        fic.insn_bytes = PFX_BYTES + 3;
+        insn_bytes = PFX_BYTES + 3;
         opc[3] = 0xc3;
 
         copy_VEX(opc, vex);
         /* Latch MXCSR - we may need to restore it below. */
         invoke_stub("stmxcsr %[mxcsr]", "",
-                    "=m" (*mmvalp), "+m" (fic.exn_raised), [mxcsr] "=m" (mxcsr)
-                    : "a" (mmvalp));
+                    "=m" (*mmvalp), [mxcsr] "=m" (mxcsr) : "a" (mmvalp));
 
         put_stub(stub);
-        check_xmm_exn(&fic);
 
         if ( ea.type == OP_MEM )
         {
@@ -7977,7 +7912,7 @@ x86_emulate(
     case X86EMUL_OPC_66(0x0f3a, 0x20): /* pinsrb $imm8,r32/m8,xmm */
     case X86EMUL_OPC_66(0x0f3a, 0x22): /* pinsr{d,q} $imm8,r/m,xmm */
         host_and_vcpu_must_have(sse4_1);
-        get_fpu(X86EMUL_FPU_xmm, &fic);
+        get_fpu(X86EMUL_FPU_xmm);
         memcpy(mmvalp, &src.val, op_bytes);
         ea.type = OP_MEM;
         op_bytes = src.bytes;
@@ -8087,13 +8022,13 @@ x86_emulate(
         if ( vex.opcx == vex_none )
         {
             host_and_vcpu_must_have(sse4_2);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
 
         opc = init_prefixes(stub);
@@ -8114,13 +8049,13 @@ x86_emulate(
                 goto done;
         }
         opc[2] = imm1;
-        fic.insn_bytes = PFX_BYTES + 3;
+        insn_bytes = PFX_BYTES + 3;
         opc[3] = 0xc3;
         if ( vex.opcx == vex_none )
         {
             /* Cover for extra prefix byte. */
             --opc;
-            ++fic.insn_bytes;
+            ++insn_bytes;
         }
 
         copy_REX_VEX(opc, rex_prefix, vex);
@@ -8351,7 +8286,7 @@ x86_emulate(
 
         if ( !opc )
             BUG();
-        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
+        opc[insn_bytes - PFX_BYTES] = 0xc3;
         copy_REX_VEX(opc, rex_prefix, vex);
 
         if ( ea.type == OP_MEM )
@@ -8429,13 +8364,11 @@ x86_emulate(
         if ( likely((ctxt->opcode & ~(X86EMUL_OPC_PFX_MASK |
                                       X86EMUL_OPC_ENCODING_MASK)) !=
                     X86EMUL_OPC(0x0f, 0xf7)) )
-            invoke_stub("", "", "+m" (*mmvalp), "+m" (fic.exn_raised)
-                                : "a" (mmvalp));
+            invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
         else
             invoke_stub("", "", "+m" (*mmvalp) : "D" (mmvalp));
 
         put_stub(stub);
-        check_xmm_exn(&fic);
     }
 
     switch ( dst.type )
@@ -8478,7 +8411,8 @@ x86_emulate(
     }
 
  complete_insn: /* Commit shadow register state. */
-    put_fpu(&fic, false, state, ctxt, ops);
+    put_fpu(fpu_type, false, state, ctxt, ops);
+    fpu_type = X86EMUL_FPU_none;
 
     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
     if ( !mode_64bit() )
@@ -8502,13 +8436,22 @@ x86_emulate(
     ctxt->regs->eflags &= ~X86_EFLAGS_RF;
 
  done:
-    put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, state, ctxt, ops);
+    put_fpu(fpu_type, insn_bytes > 0 && dst.type == OP_MEM, state, ctxt, ops);
     put_stub(stub);
     return rc;
 #undef state
 
 #ifdef __XEN__
  emulation_stub_failure:
+    generate_exception_if(stub_exn.info.fields.trapnr == EXC_MF, EXC_MF);
+    if ( stub_exn.info.fields.trapnr == EXC_XM )
+    {
+        unsigned long cr4;
+
+        if ( !ops->read_cr || !ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY )
+            cr4 = X86_CR4_OSXMMEXCPT;
+        generate_exception(cr4 & X86_CR4_OSXMMEXCPT ? EXC_XM : EXC_UD);
+    }
     gprintk(XENLOG_WARNING,
             "exception %u (ec=%04x) in emulation stub (line %u)\n",
             stub_exn.info.fields.trapnr, stub_exn.info.fields.ec,
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -446,12 +446,8 @@ struct x86_emulate_ops
 
     /*
      * get_fpu: Load emulated environment's FPU state onto processor.
-     *  @exn_callback: On any FPU or SIMD exception, pass control to
-     *                 (*exception_callback)(exception_callback_arg, regs).
      */
     int (*get_fpu)(
-        void (*exception_callback)(void *, struct cpu_user_regs *),
-        void *exception_callback_arg,
         enum x86_emulate_fpu_type type,
         struct x86_emulate_ctxt *ctxt);
 
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -198,10 +198,6 @@ struct hvm_vcpu {
 
     struct hvm_vcpu_io  hvm_io;
 
-    /* Callback into x86_emulate when emulating FPU/MMX/XMM instructions. */
-    void (*fpu_exception_callback)(void *, struct cpu_user_regs *);
-    void *fpu_exception_callback_arg;
-
     /* Pending hw/sw interrupt (.vector = -1 means nothing pending). */
     struct x86_event     inject_event;
 



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 06/14] x86emul: tell cmpxchg hook whether LOCK is in effect
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
                   ` (4 preceding siblings ...)
  2018-03-15 13:06 ` [PATCH v5 05/14] x86/HVM: eliminate custom #MF/#XM handling Jan Beulich
@ 2018-03-15 13:07 ` Jan Beulich
  2018-03-15 13:08 ` [PATCH v5 07/14] x86emul: correctly handle CMPXCHG* comparison failures Jan Beulich
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:07 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

This is necessary for the hook to correctly perform the operation.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Tim Deegan <tim@xen.org>
---
v5: Re-base.
v3: New.

--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
@@ -347,6 +347,7 @@ static int fuzz_cmpxchg(
     void *old,
     void *new,
     unsigned int bytes,
+    bool lock,
     struct x86_emulate_ctxt *ctxt)
 {
     /*
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -320,6 +320,7 @@ static int cmpxchg(
     void *old,
     void *new,
     unsigned int bytes,
+    bool lock,
     struct x86_emulate_ctxt *ctxt)
 {
     if ( verbose )
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1250,6 +1250,7 @@ static int hvmemul_cmpxchg_discard(
     void *p_old,
     void *p_new,
     unsigned int bytes,
+    bool lock,
     struct x86_emulate_ctxt *ctxt)
 {
     return X86EMUL_OKAY;
@@ -1293,6 +1294,7 @@ static int hvmemul_cmpxchg(
     void *p_old,
     void *p_new,
     unsigned int bytes,
+    bool lock,
     struct x86_emulate_ctxt *ctxt)
 {
     /* Fix this in case the guest is really relying on r-m-w atomicity. */
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -281,6 +281,7 @@ hvm_emulate_cmpxchg(enum x86_segment seg
                     void *p_old,
                     void *p_new,
                     unsigned int bytes,
+                    bool lock,
                     struct x86_emulate_ctxt *ctxt)
 {
     struct sh_emulate_ctxt *sh_ctxt =
--- a/xen/arch/x86/pv/ro-page-fault.c
+++ b/xen/arch/x86/pv/ro-page-fault.c
@@ -216,7 +216,7 @@ static int ptwr_emulated_write(enum x86_
 
 static int ptwr_emulated_cmpxchg(enum x86_segment seg, unsigned long offset,
                                  void *p_old, void *p_new, unsigned int bytes,
-                                 struct x86_emulate_ctxt *ctxt)
+                                 bool lock, struct x86_emulate_ctxt *ctxt)
 {
     intpte_t old = 0, new = 0;
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1947,7 +1947,7 @@ protmode_load_seg(
 
         fail_if(!ops->cmpxchg);
         switch ( (rc = ops->cmpxchg(sel_seg, (sel & 0xfff8) + 4, &desc.b,
-                                    &new_desc_b, sizeof(desc.b), ctxt)) )
+                                    &new_desc_b, sizeof(desc.b), true, ctxt)) )
         {
         case X86EMUL_OKAY:
             break;
@@ -6941,7 +6941,8 @@ x86_emulate(
             }
 
             if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
-                                    op_bytes, ctxt)) != X86EMUL_OKAY )
+                                    op_bytes, lock_prefix,
+                                    ctxt)) != X86EMUL_OKAY )
                 goto done;
             _regs.eflags |= X86_EFLAGS_ZF;
         }
@@ -8392,7 +8393,7 @@ x86_emulate(
             fail_if(!ops->cmpxchg);
             rc = ops->cmpxchg(
                 dst.mem.seg, dst.mem.off, &dst.orig_val,
-                &dst.val, dst.bytes, ctxt);
+                &dst.val, dst.bytes, true, ctxt);
         }
         else
         {
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -245,10 +245,11 @@ struct x86_emulate_ops
         struct x86_emulate_ctxt *ctxt);
 
     /*
-     * cmpxchg: Emulate an atomic (LOCKed) CMPXCHG operation.
+     * cmpxchg: Emulate a CMPXCHG operation.
      *  @p_old: [IN ] Pointer to value expected to be current at @addr.
      *  @p_new: [IN ] Pointer to value to write to @addr.
      *  @bytes: [IN ] Operation size (up to 8 (x86/32) or 16 (x86/64) bytes).
+     *  @lock:  [IN ] atomic (LOCKed) operation
      */
     int (*cmpxchg)(
         enum x86_segment seg,
@@ -256,6 +257,7 @@ struct x86_emulate_ops
         void *p_old,
         void *p_new,
         unsigned int bytes,
+        bool lock,
         struct x86_emulate_ctxt *ctxt);
 
     /*



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 07/14] x86emul: correctly handle CMPXCHG* comparison failures
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
                   ` (5 preceding siblings ...)
  2018-03-15 13:07 ` [PATCH v5 06/14] x86emul: tell cmpxchg hook whether LOCK is in effect Jan Beulich
@ 2018-03-15 13:08 ` Jan Beulich
  2018-03-15 13:08 ` [PATCH v5 08/14] x86emul: add read-modify-write hook Jan Beulich
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:08 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

If the ->cmpxchg() hook finds a mismatch, we should deal with this the
same way as when the "manual" comparison reports a mismatch.

This involves reverting bfce0e62c3 ("x86/emul: Drop
X86EMUL_CMPXCHG_FAILED"), albeit with X86EMUL_CMPXCHG_FAILED now
becoming a value distinct from X86EMUL_RETRY.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Tim Deegan <tim@xen.org>
---
v4: Add comment to ptwr_emulated_update(). paddr_t -> intpte change
    split off to separate patch.
v3: New.
---
The code could be further simplified if we could rely on all
->cmpxchg() hooks always using CMPXCHG, but for now we need to cope
with them using plain writes (and hence accept the double reads if
CMPXCHG is actually being used).
Note that the patch doesn't address the incorrectness of there not
being a memory write even in the comparison-failed case.

--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -302,8 +302,12 @@ hvm_emulate_cmpxchg(enum x86_segment seg
     memcpy(&old, p_old, bytes);
     memcpy(&new, p_new, bytes);
 
-    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
-               v, addr, old, new, bytes, sh_ctxt);
+    rc = v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
+             v, addr, &old, new, bytes, sh_ctxt);
+
+    memcpy(p_old, &old, bytes);
+
+    return rc;
 }
 
 static const struct x86_emulate_ops hvm_shadow_emulator_ops = {
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -4723,11 +4723,11 @@ sh_x86_emulate_write(struct vcpu *v, uns
 
 static int
 sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr,
-                        unsigned long old, unsigned long new,
-                        unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
+                       unsigned long *p_old, unsigned long new,
+                       unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
 {
     void *addr;
-    unsigned long prev;
+    unsigned long prev, old = *p_old;
     int rv = X86EMUL_OKAY;
 
     /* Unaligned writes are only acceptable on HVM */
@@ -4751,7 +4751,10 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
     }
 
     if ( prev != old )
-        rv = X86EMUL_RETRY;
+    {
+        *p_old = prev;
+        rv = X86EMUL_CMPXCHG_FAILED;
+    }
 
     SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx"
                   " wanted %#lx now %#lx bytes %u\n",
--- a/xen/arch/x86/pv/ro-page-fault.c
+++ b/xen/arch/x86/pv/ro-page-fault.c
@@ -65,14 +65,20 @@ static int ptwr_emulated_read(enum x86_s
     return X86EMUL_OKAY;
 }
 
-static int ptwr_emulated_update(unsigned long addr, intpte_t old, intpte_t val,
-                                unsigned int bytes, unsigned int do_cmpxchg,
+/*
+ * p_old being NULL indicates a plain write to occur, while a non-NULL
+ * input requests a CMPXCHG-based update.
+ */
+static int ptwr_emulated_update(unsigned long addr, intpte_t *p_old,
+                                intpte_t val, unsigned int bytes,
                                 struct x86_emulate_ctxt *ctxt)
 {
     unsigned long mfn;
     unsigned long unaligned_addr = addr;
     struct page_info *page;
     l1_pgentry_t pte, ol1e, nl1e, *pl1e;
+    intpte_t old = p_old ? *p_old : 0;
+    unsigned int offset = 0;
     struct vcpu *v = current;
     struct domain *d = v->domain;
     struct ptwr_emulate_ctxt *ptwr_ctxt = ctxt->data;
@@ -91,7 +97,9 @@ static int ptwr_emulated_update(unsigned
     if ( bytes != sizeof(val) )
     {
         intpte_t full;
-        unsigned int rc, offset = addr & (sizeof(full) - 1);
+        unsigned int rc;
+
+        offset = addr & (sizeof(full) - 1);
 
         /* Align address; read full word. */
         addr &= ~(sizeof(full) - 1);
@@ -131,7 +139,7 @@ static int ptwr_emulated_update(unsigned
     {
     default:
         if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
-             !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
+             !p_old && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
         {
             /*
              * If this is an upper-half write to a PAE PTE then we assume that
@@ -162,21 +170,26 @@ static int ptwr_emulated_update(unsigned
     /* Checked successfully: do the update (write or cmpxchg). */
     pl1e = map_domain_page(_mfn(mfn));
     pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
-    if ( do_cmpxchg )
+    if ( p_old )
     {
-        bool okay;
-        intpte_t t = old;
 
         ol1e = l1e_from_intpte(old);
-        okay = paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e),
-                                          &t, l1e_get_intpte(nl1e), _mfn(mfn));
-        okay = (okay && t == old);
+        if ( !paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e),
+                                         &old, l1e_get_intpte(nl1e), _mfn(mfn)) )
+            ret = X86EMUL_UNHANDLEABLE;
+        else if ( l1e_get_intpte(ol1e) == old )
+            ret = X86EMUL_OKAY;
+        else
+        {
+            *p_old = old >> (offset * 8);
+            ret = X86EMUL_CMPXCHG_FAILED;
+        }
 
-        if ( !okay )
+        if ( ret != X86EMUL_OKAY )
         {
             unmap_domain_page(pl1e);
             put_page_from_l1e(nl1e, d);
-            return X86EMUL_RETRY;
+            return ret;
         }
     }
     else
@@ -211,7 +224,7 @@ static int ptwr_emulated_write(enum x86_
 
     memcpy(&val, p_data, bytes);
 
-    return ptwr_emulated_update(offset, 0, val, bytes, 0, ctxt);
+    return ptwr_emulated_update(offset, NULL, val, bytes, ctxt);
 }
 
 static int ptwr_emulated_cmpxchg(enum x86_segment seg, unsigned long offset,
@@ -219,6 +232,7 @@ static int ptwr_emulated_cmpxchg(enum x8
                                  bool lock, struct x86_emulate_ctxt *ctxt)
 {
     intpte_t old = 0, new = 0;
+    int rc;
 
     if ( (bytes > sizeof(new)) || (bytes & (bytes - 1)) )
     {
@@ -230,7 +244,11 @@ static int ptwr_emulated_cmpxchg(enum x8
     memcpy(&old, p_old, bytes);
     memcpy(&new, p_new, bytes);
 
-    return ptwr_emulated_update(offset, old, new, bytes, 1, ctxt);
+    rc = ptwr_emulated_update(offset, &old, new, bytes, ctxt);
+
+    memcpy(p_old, &old, bytes);
+
+    return rc;
 }
 
 static const struct x86_emulate_ops ptwr_emulate_ops = {
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1959,6 +1959,9 @@ protmode_load_seg(
 
         default:
             return rc;
+
+        case X86EMUL_CMPXCHG_FAILED:
+            return X86EMUL_RETRY;
         }
 
         /* Force the Accessed flag in our local copy. */
@@ -6603,21 +6606,45 @@ x86_emulate(
         break;
 
     case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */
-        /* Save real source value, then compare EAX against destination. */
-        src.orig_val = src.val;
-        src.val = _regs.r(ax);
-        /* cmp: %%eax - dst ==> dst and src swapped for macro invocation */
-        emulate_2op_SrcV("cmp", dst, src, _regs.eflags);
-        if ( _regs.eflags & X86_EFLAGS_ZF )
+        fail_if(!ops->cmpxchg);
+        _regs.eflags &= ~EFLAGS_MASK;
+        if ( !((dst.val ^ _regs.r(ax)) &
+               (~0UL >> (8 * (sizeof(long) - dst.bytes)))) )
         {
             /* Success: write back to memory. */
-            dst.val = src.orig_val;
+            if ( dst.type == OP_MEM )
+            {
+                dst.val = _regs.r(ax);
+                switch ( rc = ops->cmpxchg(dst.mem.seg, dst.mem.off, &dst.val,
+                                           &src.val, dst.bytes, lock_prefix,
+                                           ctxt) )
+                {
+                case X86EMUL_OKAY:
+                    dst.type = OP_NONE;
+                    _regs.eflags |= X86_EFLAGS_ZF | X86_EFLAGS_PF;
+                    break;
+                case X86EMUL_CMPXCHG_FAILED:
+                    rc = X86EMUL_OKAY;
+                    break;
+                default:
+                    goto done;
+                }
+            }
+            else
+            {
+                dst.val = src.val;
+                _regs.eflags |= X86_EFLAGS_ZF | X86_EFLAGS_PF;
+            }
         }
-        else
+        if ( !(_regs.eflags & X86_EFLAGS_ZF) )
         {
             /* Failure: write the value we saw to EAX. */
             dst.type = OP_REG;
             dst.reg  = (unsigned long *)&_regs.r(ax);
+            /* cmp: %%eax - dst ==> dst and src swapped for macro invocation */
+            src.val = _regs.r(ax);
+            emulate_2op_SrcV("cmp", dst, src, _regs.eflags);
+            ASSERT(!(_regs.eflags & X86_EFLAGS_ZF));
         }
         break;
 
@@ -6918,6 +6945,7 @@ x86_emulate(
 
         if ( memcmp(old, aux, op_bytes) )
         {
+        cmpxchgNb_failed:
             /* Expected != actual: store actual to rDX:rAX and clear ZF. */
             _regs.r(ax) = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0];
             _regs.r(dx) = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1];
@@ -6927,7 +6955,7 @@ x86_emulate(
         {
             /*
              * Expected == actual: Get proposed value, attempt atomic cmpxchg
-             * and set ZF.
+             * and set ZF if successful.
              */
             if ( !(rex_prefix & REX_W) )
             {
@@ -6940,11 +6968,20 @@ x86_emulate(
                 aux->u64[1] = _regs.r(cx);
             }
 
-            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
-                                    op_bytes, lock_prefix,
-                                    ctxt)) != X86EMUL_OKAY )
+            switch ( rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
+                                       op_bytes, lock_prefix, ctxt) )
+            {
+            case X86EMUL_OKAY:
+                _regs.eflags |= X86_EFLAGS_ZF;
+                break;
+
+            case X86EMUL_CMPXCHG_FAILED:
+                rc = X86EMUL_OKAY;
+                goto cmpxchgNb_failed;
+
+            default:
                 goto done;
-            _regs.eflags |= X86_EFLAGS_ZF;
+            }
         }
         break;
     }
@@ -8394,6 +8431,8 @@ x86_emulate(
             rc = ops->cmpxchg(
                 dst.mem.seg, dst.mem.off, &dst.orig_val,
                 &dst.val, dst.bytes, true, ctxt);
+            if ( rc == X86EMUL_CMPXCHG_FAILED )
+                rc = X86EMUL_RETRY;
         }
         else
         {
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -158,6 +158,8 @@ struct x86_emul_fpu_aux {
   * strictly expected for now.
  */
 #define X86EMUL_UNRECOGNIZED   X86EMUL_UNIMPLEMENTED
+ /* (cmpxchg accessor): CMPXCHG failed. */
+#define X86EMUL_CMPXCHG_FAILED 7
 
 /* FPU sub-types which may be requested via ->get_fpu(). */
 enum x86_emulate_fpu_type {
@@ -247,6 +249,8 @@ struct x86_emulate_ops
     /*
      * cmpxchg: Emulate a CMPXCHG operation.
      *  @p_old: [IN ] Pointer to value expected to be current at @addr.
+     *          [OUT] Pointer to value found at @addr (may always be
+     *                updated, meaningful for X86EMUL_CMPXCHG_FAILED only).
      *  @p_new: [IN ] Pointer to value to write to @addr.
      *  @bytes: [IN ] Operation size (up to 8 (x86/32) or 16 (x86/64) bytes).
      *  @lock:  [IN ] atomic (LOCKed) operation
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -86,7 +86,7 @@ struct shadow_paging_mode {
                                             void *src, u32 bytes,
                                             struct sh_emulate_ctxt *sh_ctxt);
     int           (*x86_emulate_cmpxchg   )(struct vcpu *v, unsigned long va,
-                                            unsigned long old, 
+                                            unsigned long *old,
                                             unsigned long new,
                                             unsigned int bytes,
                                             struct sh_emulate_ctxt *sh_ctxt);



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 08/14] x86emul: add read-modify-write hook
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
                   ` (6 preceding siblings ...)
  2018-03-15 13:08 ` [PATCH v5 07/14] x86emul: correctly handle CMPXCHG* comparison failures Jan Beulich
@ 2018-03-15 13:08 ` Jan Beulich
  2018-03-15 14:19   ` Andrew Cooper
  2018-03-15 13:09 ` [PATCH v5 09/14] x86emul: also handle shifts through ->rmw() Jan Beulich
                   ` (5 subsequent siblings)
  13 siblings, 1 reply; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:08 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

In order to correctly emulate read-modify-write insns, especially
LOCKed ones, we should not issue reads and writes separately. Use a
new hook to combine both, and don't uniformly read the memory
destination anymore. Instead, DstMem opcodes without Mov now need to
have done so in their respective case blocks.

Also strip bogus _ prefixes from macro parameters when this only affects
lines which are being changed anyway.

In the test harness, besides some re-ordering to facilitate running a
few tests twice (one without and a second time with the .rmw hook in
place), tighten a few EFLAGS checks and add a test for NOT with memory
operand (in particular to verify EFLAGS don't get altered there).

For now make use of the hook optional for callers; eventually we may
want to consider making this mandatory.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Extend comment on the setting of lock_prefix in XCHG handling.
v3: New.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -314,6 +314,17 @@ static int write(
     return X86EMUL_OKAY;
 }
 
+static int rmw(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned int bytes,
+    uint32_t *eflags,
+    struct x86_emulate_state *state,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return x86_emul_rmw((void *)offset, bytes, eflags, state, ctxt);
+}
+
 static int cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
@@ -378,6 +389,9 @@ static struct x86_emulate_ops emulops =
     .put_fpu    = emul_test_put_fpu,
 };
 
+#define EFLAGS_ALWAYS_SET (X86_EFLAGS_IF | X86_EFLAGS_MBS)
+#define EFLAGS_MASK (X86_EFLAGS_ARITH_MASK | EFLAGS_ALWAYS_SET)
+
 int main(int argc, char **argv)
 {
     struct x86_emulate_ctxt ctxt;
@@ -414,6 +428,7 @@ int main(int argc, char **argv)
     if ( !stack_exec )
         printf("Warning: Stack could not be made executable (%d).\n", errno);
 
+ rmw_restart:
     printf("%-40s", "Testing addl %ecx,(%eax)...");
     instr[0] = 0x01; instr[1] = 0x08;
     regs.eflags = 0x200;
@@ -556,35 +571,32 @@ int main(int argc, char **argv)
         goto fail;
     printf("okay\n");
 
-    printf("%-40s", "Testing rep movsw...");
-    instr[0] = 0xf3; instr[1] = 0x66; instr[2] = 0xa5;
+    printf("%-40s", "Testing notb (%edi)...");
+    instr[0] = 0xf6; instr[1] = 0x17;
     *res        = 0x22334455;
-    regs.eflags = 0x200;
-    regs.ecx    = 23;
+    regs.eflags = EFLAGS_MASK;
     regs.eip    = (unsigned long)&instr[0];
-    regs.esi    = (unsigned long)res + 0;
-    regs.edi    = (unsigned long)res + 2;
+    regs.edi    = (unsigned long)res;
     rc = x86_emulate(&ctxt, &emulops);
-    if ( (rc != X86EMUL_OKAY) || 
-         (*res != 0x44554455) ||
-         (regs.eflags != 0x200) ||
-         (regs.ecx != 22) || 
-         (regs.esi != ((unsigned long)res + 2)) ||
-         (regs.edi != ((unsigned long)res + 4)) ||
-         (regs.eip != (unsigned long)&instr[0]) )
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != 0x223344aa) ||
+         ((regs.eflags & EFLAGS_MASK) != EFLAGS_MASK) ||
+         (regs.eip != (unsigned long)&instr[2]) )
         goto fail;
     printf("okay\n");
 
     printf("%-40s", "Testing btrl $0x1,(%edi)...");
     instr[0] = 0x0f; instr[1] = 0xba; instr[2] = 0x37; instr[3] = 0x01;
     *res        = 0x2233445F;
-    regs.eflags = 0x200;
+    regs.eflags = EFLAGS_ALWAYS_SET;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)res;
     rc = x86_emulate(&ctxt, &emulops);
     if ( (rc != X86EMUL_OKAY) ||
          (*res != 0x2233445D) ||
-         ((regs.eflags&0x201) != 0x201) ||
+         ((regs.eflags & (EFLAGS_ALWAYS_SET | X86_EFLAGS_ZF |
+                          X86_EFLAGS_CF)) !=
+          (EFLAGS_ALWAYS_SET | X86_EFLAGS_CF)) ||
          (regs.eip != (unsigned long)&instr[4]) )
         goto fail;
     printf("okay\n");
@@ -592,14 +604,16 @@ int main(int argc, char **argv)
     printf("%-40s", "Testing btrl %eax,(%edi)...");
     instr[0] = 0x0f; instr[1] = 0xb3; instr[2] = 0x07;
     *res        = 0x2233445F;
-    regs.eflags = 0x200;
+    regs.eflags = EFLAGS_ALWAYS_SET | X86_EFLAGS_ZF;
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = -32;
     regs.edi    = (unsigned long)(res+1);
     rc = x86_emulate(&ctxt, &emulops);
     if ( (rc != X86EMUL_OKAY) ||
          (*res != 0x2233445E) ||
-         ((regs.eflags&0x201) != 0x201) ||
+         ((regs.eflags & (EFLAGS_ALWAYS_SET | X86_EFLAGS_ZF |
+                          X86_EFLAGS_CF)) !=
+          (EFLAGS_ALWAYS_SET | X86_EFLAGS_ZF | X86_EFLAGS_CF)) ||
          (regs.eip != (unsigned long)&instr[3]) )
         goto fail;
     printf("okay\n");
@@ -607,19 +621,63 @@ int main(int argc, char **argv)
 #ifdef __x86_64__
     printf("%-40s", "Testing btcq %r8,(%r11)...");
     instr[0] = 0x4d; instr[1] = 0x0f; instr[2] = 0xbb; instr[3] = 0x03;
-    regs.eflags = 0x200;
+    regs.eflags = EFLAGS_ALWAYS_SET;
     regs.rip    = (unsigned long)&instr[0];
     regs.r8     = (-1L << 40) + 1;
     regs.r11    = (unsigned long)(res + (1L << 35));
     rc = x86_emulate(&ctxt, &emulops);
     if ( (rc != X86EMUL_OKAY) ||
          (*res != 0x2233445C) ||
-         (regs.eflags != 0x201) ||
+         ((regs.eflags & (EFLAGS_ALWAYS_SET | X86_EFLAGS_ZF |
+                          X86_EFLAGS_CF)) !=
+          (EFLAGS_ALWAYS_SET | X86_EFLAGS_CF)) ||
          (regs.rip != (unsigned long)&instr[4]) )
         goto fail;
     printf("okay\n");
 #endif
 
+    printf("%-40s", "Testing xadd %ax,(%ecx)...");
+    instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0xc1; instr[3] = 0x01;
+    regs.eflags = EFLAGS_ALWAYS_SET | X86_EFLAGS_ARITH_MASK;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.ecx    = (unsigned long)res;
+    regs.eax    = 0x12345678;
+    *res        = 0x11111111;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != 0x11116789) ||
+         (regs.eax != 0x12341111) ||
+         ((regs.eflags & EFLAGS_MASK) != EFLAGS_ALWAYS_SET) ||
+         (regs.eip != (unsigned long)&instr[4]) )
+        goto fail;
+    printf("okay\n");
+
+    if ( !emulops.rmw )
+    {
+        printf("[Switching to read-modify-write mode]\n");
+        emulops.rmw = rmw;
+        goto rmw_restart;
+    }
+
+    printf("%-40s", "Testing rep movsw...");
+    instr[0] = 0xf3; instr[1] = 0x66; instr[2] = 0xa5;
+    *res        = 0x22334455;
+    regs.eflags = 0x200;
+    regs.ecx    = 23;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.esi    = (unsigned long)res + 0;
+    regs.edi    = (unsigned long)res + 2;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != 0x44554455) ||
+         (regs.eflags != 0x200) ||
+         (regs.ecx != 22) ||
+         (regs.esi != ((unsigned long)res + 2)) ||
+         (regs.edi != ((unsigned long)res + 4)) ||
+         (regs.eip != (unsigned long)&instr[0]) )
+        goto fail;
+    printf("okay\n");
+
     res[0] = 0x12345678;
     res[1] = 0x87654321;
 
@@ -745,22 +803,6 @@ int main(int argc, char **argv)
 #endif
     printf("okay\n");
 
-    printf("%-40s", "Testing xadd %ax,(%ecx)...");
-    instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0xc1; instr[3] = 0x01;
-    regs.eflags = 0x200;
-    regs.eip    = (unsigned long)&instr[0];
-    regs.ecx    = (unsigned long)res;
-    regs.eax    = 0x12345678;
-    *res        = 0x11111111;
-    rc = x86_emulate(&ctxt, &emulops);
-    if ( (rc != X86EMUL_OKAY) ||
-         (*res != 0x11116789) ||
-         (regs.eax != 0x12341111) ||
-         ((regs.eflags&0x240) != 0x200) ||
-         (regs.eip != (unsigned long)&instr[4]) )
-        goto fail;
-    printf("okay\n");
-
     printf("%-40s", "Testing dec %ax...");
 #ifndef __x86_64__
     instr[0] = 0x66; instr[1] = 0x48;
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -669,6 +669,25 @@ struct x86_emulate_state {
         ext_8f09,
         ext_8f0a,
     } ext;
+    enum {
+        rmw_NONE,
+        rmw_adc,
+        rmw_add,
+        rmw_and,
+        rmw_btc,
+        rmw_btr,
+        rmw_bts,
+        rmw_dec,
+        rmw_inc,
+        rmw_neg,
+        rmw_not,
+        rmw_or,
+        rmw_sbb,
+        rmw_sub,
+        rmw_xadd,
+        rmw_xchg,
+        rmw_xor,
+    } rmw;
     uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
     uint8_t sib_index, sib_scale;
     uint8_t rex_prefix;
@@ -823,123 +842,136 @@ typedef union {
 "orl  %"_LO32 _tmp",%"_LO32 _sav"; "
 
 /* Raw emulation: instruction has two explicit operands. */
-#define __emulate_2op_nobyte(_op,_src,_dst,_eflags, wsx,wsy,wdx,wdy,       \
-                             lsx,lsy,ldx,ldy, qsx,qsy,qdx,qdy)             \
+#define __emulate_2op_nobyte(_op, src, dst, sz, eflags, wsx,wsy,wdx,wdy,   \
+                             lsx,lsy,ldx,ldy, qsx,qsy,qdx,qdy, extra...)   \
 do{ unsigned long _tmp;                                                    \
-    switch ( (_dst).bytes )                                                \
+    switch ( sz )                                                          \
     {                                                                      \
     case 2:                                                                \
         asm volatile (                                                     \
             _PRE_EFLAGS("0","4","2")                                       \
             _op"w %"wsx"3,%"wdx"1; "                                       \
             _POST_EFLAGS("0","4","2")                                      \
-            : "+g" (_eflags), "+" wdy ((_dst).val), "=&r" (_tmp)           \
-            : wsy ((_src).val), "i" (EFLAGS_MASK) );                       \
+            : "+g" (eflags), "+" wdy (*(dst)), "=&r" (_tmp)                \
+            : wsy (src), "i" (EFLAGS_MASK), ## extra );                    \
         break;                                                             \
     case 4:                                                                \
         asm volatile (                                                     \
             _PRE_EFLAGS("0","4","2")                                       \
             _op"l %"lsx"3,%"ldx"1; "                                       \
             _POST_EFLAGS("0","4","2")                                      \
-            : "+g" (_eflags), "+" ldy ((_dst).val), "=&r" (_tmp)           \
-            : lsy ((_src).val), "i" (EFLAGS_MASK) );                       \
+            : "+g" (eflags), "+" ldy (*(dst)), "=&r" (_tmp)                \
+            : lsy (src), "i" (EFLAGS_MASK), ## extra );                    \
         break;                                                             \
     case 8:                                                                \
-        __emulate_2op_8byte(_op, _src, _dst, _eflags, qsx, qsy, qdx, qdy); \
+        __emulate_2op_8byte(_op, src, dst, eflags, qsx, qsy, qdx, qdy,     \
+                            ## extra);                                     \
         break;                                                             \
     }                                                                      \
 } while (0)
-#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)\
+#define __emulate_2op(_op, src, dst, sz, eflags, _bx, by, wx, wy,          \
+                      lx, ly, qx, qy, extra...)                            \
 do{ unsigned long _tmp;                                                    \
-    switch ( (_dst).bytes )                                                \
+    switch ( sz )                                                          \
     {                                                                      \
     case 1:                                                                \
         asm volatile (                                                     \
             _PRE_EFLAGS("0","4","2")                                       \
             _op"b %"_bx"3,%1; "                                            \
             _POST_EFLAGS("0","4","2")                                      \
-            : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)              \
-            : _by ((_src).val), "i" (EFLAGS_MASK) );                       \
+            : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp)                   \
+            : by (src), "i" (EFLAGS_MASK), ##extra );                      \
         break;                                                             \
     default:                                                               \
-        __emulate_2op_nobyte(_op,_src,_dst,_eflags, _wx,_wy,"","m",        \
-                             _lx,_ly,"","m", _qx,_qy,"","m");              \
+        __emulate_2op_nobyte(_op, src, dst, sz, eflags, wx, wy, "", "m",   \
+                             lx, ly, "", "m", qx, qy, "", "m", ##extra);   \
         break;                                                             \
     }                                                                      \
 } while (0)
 /* Source operand is byte-sized and may be restricted to just %cl. */
-#define emulate_2op_SrcB(_op, _src, _dst, _eflags)                         \
-    __emulate_2op(_op, _src, _dst, _eflags,                                \
+#define _emulate_2op_SrcB(op, src, dst, sz, eflags)                        \
+    __emulate_2op(op, src, dst, sz, eflags,                                \
                   "b", "c", "b", "c", "b", "c", "b", "c")
+#define emulate_2op_SrcB(op, src, dst, eflags)                             \
+    _emulate_2op_SrcB(op, (src).val, &(dst).val, (dst).bytes, eflags)
 /* Source operand is byte, word, long or quad sized. */
+#define _emulate_2op_SrcV(op, src, dst, sz, eflags, extra...)              \
+    __emulate_2op(op, src, dst, sz, eflags,                                \
+                  "b", "q", "w", "r", _LO32, "r", "", "r", ##extra)
 #define emulate_2op_SrcV(_op, _src, _dst, _eflags)                         \
-    __emulate_2op(_op, _src, _dst, _eflags,                                \
-                  "b", "q", "w", "r", _LO32, "r", "", "r")
+    _emulate_2op_SrcV(_op, (_src).val, &(_dst).val, (_dst).bytes, _eflags)
 /* Source operand is word, long or quad sized. */
+#define _emulate_2op_SrcV_nobyte(op, src, dst, sz, eflags, extra...)       \
+    __emulate_2op_nobyte(op, src, dst, sz, eflags, "w", "r", "", "m",      \
+                         _LO32, "r", "", "m", "", "r", "", "m", ##extra)
 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)                  \
-    __emulate_2op_nobyte(_op, _src, _dst, _eflags, "w", "r", "", "m",      \
-                         _LO32, "r", "", "m", "", "r", "", "m")
+    _emulate_2op_SrcV_nobyte(_op, (_src).val, &(_dst).val, (_dst).bytes,   \
+                             _eflags)
 /* Operands are word, long or quad sized and source may be in memory. */
 #define emulate_2op_SrcV_srcmem(_op, _src, _dst, _eflags)                  \
-    __emulate_2op_nobyte(_op, _src, _dst, _eflags, "", "m", "w", "r",      \
+    __emulate_2op_nobyte(_op, (_src).val, &(_dst).val, (_dst).bytes,       \
+                         _eflags, "", "m", "w", "r",                       \
                          "", "m", _LO32, "r", "", "m", "", "r")
 
 /* Instruction has only one explicit operand (no source operand). */
-#define emulate_1op(_op,_dst,_eflags)                                      \
+#define _emulate_1op(_op, dst, sz, eflags, extra...)                       \
 do{ unsigned long _tmp;                                                    \
-    switch ( (_dst).bytes )                                                \
+    switch ( sz )                                                          \
     {                                                                      \
     case 1:                                                                \
         asm volatile (                                                     \
             _PRE_EFLAGS("0","3","2")                                       \
             _op"b %1; "                                                    \
             _POST_EFLAGS("0","3","2")                                      \
-            : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)              \
-            : "i" (EFLAGS_MASK) );                                         \
+            : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp)                   \
+            : "i" (EFLAGS_MASK), ##extra );                                \
         break;                                                             \
     case 2:                                                                \
         asm volatile (                                                     \
             _PRE_EFLAGS("0","3","2")                                       \
             _op"w %1; "                                                    \
             _POST_EFLAGS("0","3","2")                                      \
-            : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)              \
-            : "i" (EFLAGS_MASK) );                                         \
+            : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp)                   \
+            : "i" (EFLAGS_MASK), ##extra );                                \
         break;                                                             \
     case 4:                                                                \
         asm volatile (                                                     \
             _PRE_EFLAGS("0","3","2")                                       \
             _op"l %1; "                                                    \
             _POST_EFLAGS("0","3","2")                                      \
-            : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)              \
-            : "i" (EFLAGS_MASK) );                                         \
+            : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp)                   \
+            : "i" (EFLAGS_MASK), ##extra );                                \
         break;                                                             \
     case 8:                                                                \
-        __emulate_1op_8byte(_op, _dst, _eflags);                           \
+        __emulate_1op_8byte(_op, dst, eflags, ##extra);                    \
         break;                                                             \
     }                                                                      \
 } while (0)
+#define emulate_1op(op, dst, eflags)                                       \
+    _emulate_1op(op, &(dst).val, (dst).bytes, eflags)
 
 /* Emulate an instruction with quadword operands (x86/64 only). */
 #if defined(__x86_64__)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, qsx, qsy, qdx, qdy) \
+#define __emulate_2op_8byte(_op, src, dst, eflags,                      \
+                            qsx, qsy, qdx, qdy, extra...)               \
 do{ asm volatile (                                                      \
         _PRE_EFLAGS("0","4","2")                                        \
         _op"q %"qsx"3,%"qdx"1; "                                        \
         _POST_EFLAGS("0","4","2")                                       \
-        : "+g" (_eflags), "+" qdy ((_dst).val), "=&r" (_tmp)            \
-        : qsy ((_src).val), "i" (EFLAGS_MASK) );                        \
+        : "+g" (eflags), "+" qdy (*(dst)), "=&r" (_tmp)                 \
+        : qsy (src), "i" (EFLAGS_MASK), ##extra );                      \
 } while (0)
-#define __emulate_1op_8byte(_op, _dst, _eflags)                         \
+#define __emulate_1op_8byte(_op, dst, eflags, extra...)                 \
 do{ asm volatile (                                                      \
         _PRE_EFLAGS("0","3","2")                                        \
         _op"q %1; "                                                     \
         _POST_EFLAGS("0","3","2")                                       \
-        : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)               \
-        : "i" (EFLAGS_MASK) );                                          \
+        : "+g" (eflags), "+m" (*(dst)), "=&r" (_tmp)                    \
+        : "i" (EFLAGS_MASK), ##extra );                                 \
 } while (0)
 #elif defined(__i386__)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, qsx, qsy, qdx, qdy)
-#define __emulate_1op_8byte(_op, _dst, _eflags)
+#define __emulate_2op_8byte(op, src, dst, eflags, qsx, qsy, qdx, qdy, extra...)
+#define __emulate_1op_8byte(op, dst, eflags, extra...)
 #endif /* __i386__ */
 
 #define fail_if(p)                                      \
@@ -3243,7 +3275,7 @@ x86_emulate(
         break;
     }
 
-    /* Decode and fetch the destination operand: register or memory. */
+    /* Decode (but don't fetch) the destination operand: register or memory. */
     switch ( d & DstMask )
     {
     case DstNone: /* case DstImplicit: */
@@ -3329,7 +3361,13 @@ x86_emulate(
             case 8: dst.val = *(uint64_t *)dst.reg; break;
             }
         }
-        else if ( !(d & Mov) ) /* optimisation - avoid slow emulated read */
+        else if ( d & Mov ) /* optimisation - avoid slow emulated read */
+        {
+            /* Lock prefix is allowed only on RMW instructions. */
+            generate_exception_if(lock_prefix, EXC_UD);
+            fail_if(!ops->write);
+        }
+        else if ( !ops->rmw )
         {
             fail_if(lock_prefix ? !ops->cmpxchg : !ops->write);
             if ( (rc = read_ulong(dst.mem.seg, dst.mem.off,
@@ -3337,12 +3375,6 @@ x86_emulate(
                 goto done;
             dst.orig_val = dst.val;
         }
-        else
-        {
-            /* Lock prefix is allowed only on RMW instructions. */
-            generate_exception_if(lock_prefix, EXC_UD);
-            fail_if(!ops->write);
-        }
         break;
     }
 
@@ -3355,35 +3387,83 @@ x86_emulate(
         unsigned int i, n;
         unsigned long dummy;
 
-    case 0x00 ... 0x05: add: /* add */
-        emulate_2op_SrcV("add", src, dst, _regs.eflags);
+    case 0x00: case 0x01: add: /* add reg,mem */
+        if ( ops->rmw && dst.type == OP_MEM )
+            state->rmw = rmw_add;
+        else
+        {
+    case 0x02 ... 0x05: /* add */
+            emulate_2op_SrcV("add", src, dst, _regs.eflags);
+        }
         break;
 
-    case 0x08 ... 0x0d: or:  /* or */
-        emulate_2op_SrcV("or", src, dst, _regs.eflags);
+    case 0x08: case 0x09: or: /* or reg,mem */
+        if ( ops->rmw && dst.type == OP_MEM )
+            state->rmw = rmw_or;
+        else
+        {
+    case 0x0a ... 0x0d: /* or */
+            emulate_2op_SrcV("or", src, dst, _regs.eflags);
+        }
         break;
 
-    case 0x10 ... 0x15: adc: /* adc */
-        emulate_2op_SrcV("adc", src, dst, _regs.eflags);
+    case 0x10: case 0x11: adc: /* adc reg,mem */
+        if ( ops->rmw && dst.type == OP_MEM )
+            state->rmw = rmw_adc;
+        else
+        {
+    case 0x12 ... 0x15: /* adc */
+            emulate_2op_SrcV("adc", src, dst, _regs.eflags);
+        }
         break;
 
-    case 0x18 ... 0x1d: sbb: /* sbb */
-        emulate_2op_SrcV("sbb", src, dst, _regs.eflags);
+    case 0x18: case 0x19: sbb: /* sbb reg,mem */
+        if ( ops->rmw && dst.type == OP_MEM )
+            state->rmw = rmw_sbb;
+        else
+        {
+    case 0x1a ... 0x1d: /* sbb */
+            emulate_2op_SrcV("sbb", src, dst, _regs.eflags);
+        }
         break;
 
-    case 0x20 ... 0x25: and: /* and */
-        emulate_2op_SrcV("and", src, dst, _regs.eflags);
+    case 0x20: case 0x21: and: /* and reg,mem */
+        if ( ops->rmw && dst.type == OP_MEM )
+            state->rmw = rmw_and;
+        else
+        {
+    case 0x22 ... 0x25: /* and */
+            emulate_2op_SrcV("and", src, dst, _regs.eflags);
+        }
         break;
 
-    case 0x28 ... 0x2d: sub: /* sub */
-        emulate_2op_SrcV("sub", src, dst, _regs.eflags);
+    case 0x28: case 0x29: sub: /* sub reg,mem */
+        if ( ops->rmw && dst.type == OP_MEM )
+            state->rmw = rmw_sub;
+        else
+        {
+    case 0x2a ... 0x2d: /* sub */
+            emulate_2op_SrcV("sub", src, dst, _regs.eflags);
+        }
         break;
 
-    case 0x30 ... 0x35: xor: /* xor */
-        emulate_2op_SrcV("xor", src, dst, _regs.eflags);
+    case 0x30: case 0x31: xor: /* xor reg,mem */
+        if ( ops->rmw && dst.type == OP_MEM )
+            state->rmw = rmw_xor;
+        else
+        {
+    case 0x32 ... 0x35: /* xor */
+            emulate_2op_SrcV("xor", src, dst, _regs.eflags);
+        }
         break;
 
-    case 0x38 ... 0x3d: cmp: /* cmp */
+    case 0x38: case 0x39: cmp: /* cmp reg,mem */
+        if ( ops->rmw && dst.type == OP_MEM &&
+             (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val,
+                              dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
+            goto done;
+        /* fall through */
+    case 0x3a ... 0x3d: /* cmp */
         generate_exception_if(lock_prefix, EXC_UD);
         emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
         dst.type = OP_NONE;
@@ -3697,6 +3777,16 @@ x86_emulate(
         break;
 
     case 0x86 ... 0x87: xchg: /* xchg */
+        /*
+         * The lock prefix is implied for this insn (and setting it for the
+         * register operands case here is benign to subsequent code).
+         */
+        lock_prefix = 1;
+        if ( ops->rmw && dst.type == OP_MEM )
+        {
+            state->rmw = rmw_xchg;
+            break;
+        }
         /* Write back the register source. */
         switch ( dst.bytes )
         {
@@ -3705,9 +3795,8 @@ x86_emulate(
         case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
         case 8: *src.reg = dst.val; break;
         }
-        /* Write back the memory destination with implicit LOCK prefix. */
+        /* Arrange for write back of the memory destination. */
         dst.val = src.val;
-        lock_prefix = 1;
         break;
 
     case 0xc6: /* Grp11: mov / xabort */
@@ -4022,6 +4111,13 @@ x86_emulate(
 
     case 0xc0 ... 0xc1: grp2: /* Grp2 */
         generate_exception_if(lock_prefix, EXC_UD);
+
+        if ( ops->rmw && dst.type == OP_MEM &&
+             (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val,
+                              dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
+            goto done;
+        dst.orig_val = dst.val;
+
         switch ( modrm_reg & 7 )
         {
         case 0: /* rol */
@@ -4660,12 +4756,22 @@ x86_emulate(
 
         case 0 ... 1: /* test */
             generate_exception_if(lock_prefix, EXC_UD);
+            if ( ops->rmw && dst.type == OP_MEM &&
+                 (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val,
+                                  dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
+                goto done;
             goto test;
         case 2: /* not */
-            dst.val = ~dst.val;
+            if ( ops->rmw && dst.type == OP_MEM )
+                state->rmw = rmw_not;
+            else
+                dst.val = ~dst.val;
             break;
         case 3: /* neg */
-            emulate_1op("neg", dst, _regs.eflags);
+            if ( ops->rmw && dst.type == OP_MEM )
+                state->rmw = rmw_neg;
+            else
+                emulate_1op("neg", dst, _regs.eflags);
             break;
         case 4: /* mul */
             _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_CF);
@@ -4889,10 +4995,16 @@ x86_emulate(
         switch ( modrm_reg & 7 )
         {
         case 0: /* inc */
-            emulate_1op("inc", dst, _regs.eflags);
+            if ( ops->rmw && dst.type == OP_MEM )
+                state->rmw = rmw_inc;
+            else
+                emulate_1op("inc", dst, _regs.eflags);
             break;
         case 1: /* dec */
-            emulate_1op("dec", dst, _regs.eflags);
+            if ( ops->rmw && dst.type == OP_MEM )
+                state->rmw = rmw_dec;
+            else
+                emulate_1op("dec", dst, _regs.eflags);
             break;
         case 2: /* call (near) */
             dst.val = _regs.r(ip);
@@ -6441,6 +6553,12 @@ x86_emulate(
 
     case X86EMUL_OPC(0x0f, 0xa3): bt: /* bt */
         generate_exception_if(lock_prefix, EXC_UD);
+
+        if ( ops->rmw && dst.type == OP_MEM &&
+             (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val,
+                              dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
+            goto done;
+
         emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
         dst.type = OP_NONE;
         break;
@@ -6452,6 +6570,12 @@ x86_emulate(
         uint8_t shift, width = dst.bytes << 3;
 
         generate_exception_if(lock_prefix, EXC_UD);
+
+        if ( ops->rmw && dst.type == OP_MEM &&
+             (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val,
+                              dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
+            goto done;
+
         if ( b & 1 )
             shift = _regs.cl;
         else
@@ -6483,7 +6607,10 @@ x86_emulate(
     }
 
     case X86EMUL_OPC(0x0f, 0xab): bts: /* bts */
-        emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
+        if ( ops->rmw && dst.type == OP_MEM )
+            state->rmw = rmw_bts;
+        else
+            emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
         break;
 
     case X86EMUL_OPC(0x0f, 0xae): case X86EMUL_OPC_66(0x0f, 0xae): /* Grp15 */
@@ -6607,6 +6734,12 @@ x86_emulate(
 
     case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */
         fail_if(!ops->cmpxchg);
+
+        if ( ops->rmw && dst.type == OP_MEM &&
+             (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val,
+                              dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
+            goto done;
+
         _regs.eflags &= ~EFLAGS_MASK;
         if ( !((dst.val ^ _regs.r(ax)) &
                (~0UL >> (8 * (sizeof(long) - dst.bytes)))) )
@@ -6655,7 +6788,10 @@ x86_emulate(
         goto les;
 
     case X86EMUL_OPC(0x0f, 0xb3): btr: /* btr */
-        emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags);
+        if ( ops->rmw && dst.type == OP_MEM )
+            state->rmw = rmw_btr;
+        else
+            emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags);
         break;
 
     case X86EMUL_OPC(0x0f, 0xb6): /* movzx rm8,r{16,32,64} */
@@ -6689,7 +6825,10 @@ x86_emulate(
         break;
 
     case X86EMUL_OPC(0x0f, 0xbb): btc: /* btc */
-        emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags);
+        if ( ops->rmw && dst.type == OP_MEM )
+            state->rmw = rmw_btc;
+        else
+            emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags);
         break;
 
     case X86EMUL_OPC(0x0f, 0xbc): /* bsf or tzcnt */
@@ -6762,6 +6901,11 @@ x86_emulate(
         break;
 
     case X86EMUL_OPC(0x0f, 0xc0): case X86EMUL_OPC(0x0f, 0xc1): /* xadd */
+        if ( ops->rmw && dst.type == OP_MEM )
+        {
+            state->rmw = rmw_xadd;
+            break;
+        }
         /* Write back the register source. */
         switch ( dst.bytes )
         {
@@ -8316,7 +8460,36 @@ x86_emulate(
         goto done;
     }
 
-    if ( state->simd_size )
+    if ( state->rmw )
+    {
+        ea.val = src.val;
+        op_bytes = dst.bytes;
+        rc = ops->rmw(dst.mem.seg, dst.mem.off, dst.bytes, &_regs.eflags,
+                      state, ctxt);
+        if ( rc != X86EMUL_OKAY )
+            goto done;
+
+        /* Some operations require a register to be written. */
+        switch ( state->rmw )
+        {
+        case rmw_xchg:
+        case rmw_xadd:
+            switch ( dst.bytes )
+            {
+            case 1: *(uint8_t  *)src.reg = (uint8_t)ea.val; break;
+            case 2: *(uint16_t *)src.reg = (uint16_t)ea.val; break;
+            case 4: *src.reg = (uint32_t)ea.val; break; /* 64b reg: zero-extend */
+            case 8: *src.reg = ea.val; break;
+            }
+            break;
+
+        default:
+            break;
+        }
+
+        dst.type = OP_NONE;
+    }
+    else if ( state->simd_size )
     {
         generate_exception_if(!op_bytes, EXC_UD);
         generate_exception_if(vex.opcx && (d & TwoOp) && vex.reg != 0xf,
@@ -8517,6 +8690,142 @@ x86_emulate(
 #undef vex
 #undef ea
 
+int x86_emul_rmw(
+    void *ptr,
+    unsigned int bytes,
+    uint32_t *eflags,
+    struct x86_emulate_state *state,
+    struct x86_emulate_ctxt *ctxt)
+{
+    unsigned long *dst = ptr;
+
+    ASSERT(bytes == state->op_bytes);
+
+#ifdef __x86_64__
+# define JCXZ "jrcxz"
+#else
+# define JCXZ "jecxz"
+#endif
+
+#define COND_LOCK(op) \
+    JCXZ " .L" #op "%=\n\t" \
+    "lock\n" \
+    ".L" #op "%=:\n\t" \
+    #op
+
+    switch ( state->rmw )
+    {
+#define UNOP(op) \
+    case rmw_##op: \
+        _emulate_1op(COND_LOCK(op), dst, bytes, *eflags, \
+                     "c" ((long)state->lock_prefix) ); \
+        break
+#define BINOP(op, sfx) \
+    case rmw_##op: \
+        _emulate_2op_SrcV##sfx(COND_LOCK(op), \
+                               state->ea.val, dst, bytes, *eflags, \
+                               "c" ((long)state->lock_prefix) ); \
+        break
+
+    BINOP(adc, );
+    BINOP(add, );
+    BINOP(and, );
+    BINOP(btc, _nobyte);
+    BINOP(bts, _nobyte);
+    BINOP(btr, _nobyte);
+     UNOP(dec);
+     UNOP(inc);
+     UNOP(neg);
+    BINOP(or, );
+    BINOP(sbb, );
+    BINOP(sub, );
+    BINOP(xor, );
+
+#undef UNOP
+#undef BINOP
+
+    case rmw_not:
+        switch ( state->op_bytes )
+        {
+        case 1:
+            asm ( COND_LOCK(notb) " %0"
+                  : "+m" (*dst) : "c" ((long)state->lock_prefix) );
+            break;
+        case 2:
+            asm ( COND_LOCK(notw) " %0"
+                  : "+m" (*dst) : "c" ((long)state->lock_prefix) );
+            break;
+        case 4:
+            asm ( COND_LOCK(notl) " %0"
+                  : "+m" (*dst) : "c" ((long)state->lock_prefix) );
+            break;
+#ifdef __x86_64__
+        case 8:
+            asm ( COND_LOCK(notq) " %0"
+                  : "+m" (*dst) : "c" ((long)state->lock_prefix) );
+            break;
+#endif
+        }
+        break;
+
+    case rmw_xadd:
+        switch ( state->op_bytes )
+        {
+            unsigned long dummy;
+
+#define XADD(sz, cst, mod) \
+        case sz: \
+            asm ( _PRE_EFLAGS("[efl]", "[msk]", "[tmp]") \
+                  COND_LOCK(xadd) " %"#mod"[reg], %[mem]; " \
+                  _POST_EFLAGS("[efl]", "[msk]", "[tmp]") \
+                  : [reg] "+" #cst (state->ea.val), \
+                    [mem] "+m" (*dst), \
+                    [efl] "+g" (*eflags), \
+                    [tmp] "=&r" (dummy) \
+                  : "c" ((long)state->lock_prefix), \
+                    [msk] "i" (EFLAGS_MASK) ); \
+            break
+        XADD(1, q, b);
+        XADD(2, r, w);
+        XADD(4, r, k);
+#ifdef __x86_64__
+        XADD(8, r, );
+#endif
+#undef XADD
+        }
+        break;
+
+    case rmw_xchg:
+        switch ( state->op_bytes )
+        {
+        case 1:
+            asm ( "xchg %b0, %b1" : "+q" (state->ea.val), "+m" (*dst) );
+            break;
+        case 2:
+            asm ( "xchg %w0, %w1" : "+r" (state->ea.val), "+m" (*dst) );
+            break;
+        case 4:
+#ifdef __x86_64__
+            asm ( "xchg %k0, %k1" : "+r" (state->ea.val), "+m" (*dst) );
+            break;
+        case 8:
+#endif
+            asm ( "xchg %0, %1" : "+r" (state->ea.val), "+m" (*dst) );
+            break;
+        }
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+#undef COND_LOCK
+#undef JCXZ
+
+    return X86EMUL_OKAY;
+}
+
 static void __init __maybe_unused build_assertions(void)
 {
     /* Check the values against SReg3 encoding in opcode/ModRM bytes. */
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -247,6 +247,20 @@ struct x86_emulate_ops
         struct x86_emulate_ctxt *ctxt);
 
     /*
+     * rmw: Emulate a memory read-modify-write.
+     * @eflags: [IN/OUT] Pointer to EFLAGS to be updated according to
+     *                   instruction effects.
+     * @state:  [IN/OUT] Pointer to (opaque) emulator state.
+     */
+    int (*rmw)(
+        enum x86_segment seg,
+        unsigned long offset,
+        unsigned int bytes,
+        uint32_t *eflags,
+        struct x86_emulate_state *state,
+        struct x86_emulate_ctxt *ctxt);
+
+    /*
      * cmpxchg: Emulate a CMPXCHG operation.
      *  @p_old: [IN ] Pointer to value expected to be current at @addr.
      *          [OUT] Pointer to value found at @addr (may always be
@@ -710,6 +724,14 @@ int x86emul_write_xcr(unsigned int reg,
 
 #endif
 
+int
+x86_emul_rmw(
+    void *ptr,
+    unsigned int bytes,
+    uint32_t *eflags,
+    struct x86_emulate_state *state,
+    struct x86_emulate_ctxt *ctxt);
+
 static inline void x86_emul_hw_exception(
     unsigned int vector, int error_code, struct x86_emulate_ctxt *ctxt)
 {



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 09/14] x86emul: also handle shifts through ->rmw()
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
                   ` (7 preceding siblings ...)
  2018-03-15 13:08 ` [PATCH v5 08/14] x86emul: add read-modify-write hook Jan Beulich
@ 2018-03-15 13:09 ` Jan Beulich
  2018-03-15 14:23   ` Andrew Cooper
  2018-03-15 13:10 ` [PATCH v5 10/14] x86/HVM: do actual CMPXCHG in hvmemul_cmpxchg() Jan Beulich
                   ` (4 subsequent siblings)
  13 siblings, 1 reply; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:09 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

These don't allow LOCK, but still are read-modify-write operations, so
are better handled that way too.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: New.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -585,6 +585,37 @@ int main(int argc, char **argv)
         goto fail;
     printf("okay\n");
 
+    printf("%-40s", "Testing rcll $2,(%edi)...");
+    instr[0] = 0xc1; instr[1] = 0x17; instr[2] = 0x02;
+    *res        = 0x2233445F;
+    regs.eflags = EFLAGS_ALWAYS_SET | X86_EFLAGS_CF;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.edi    = (unsigned long)res;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != ((0x2233445F << 2) | 2)) ||
+         ((regs.eflags & (EFLAGS_MASK & ~X86_EFLAGS_OF))
+          != EFLAGS_ALWAYS_SET) ||
+         (regs.eip != (unsigned long)&instr[3]) )
+        goto fail;
+    printf("okay\n");
+
+    printf("%-40s", "Testing shrdl $8,%ecx,(%edi)...");
+    instr[0] = 0x0f; instr[1] = 0xac; instr[2] = 0x0f; instr[3] = 0x08;
+    *res        = 0x22334455;
+    regs.eflags = EFLAGS_ALWAYS_SET | X86_EFLAGS_CF;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.ecx    = 0x44332211;
+    regs.edi    = (unsigned long)res;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (*res != 0x11223344) ||
+         ((regs.eflags & (EFLAGS_MASK & ~(X86_EFLAGS_OF|X86_EFLAGS_AF)))
+          != (EFLAGS_ALWAYS_SET | X86_EFLAGS_PF)) ||
+         (regs.eip != (unsigned long)&instr[4]) )
+        goto fail;
+    printf("okay\n");
+
     printf("%-40s", "Testing btrl $0x1,(%edi)...");
     instr[0] = 0x0f; instr[1] = 0xba; instr[2] = 0x37; instr[3] = 0x01;
     *res        = 0x2233445F;
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -682,7 +682,16 @@ struct x86_emulate_state {
         rmw_neg,
         rmw_not,
         rmw_or,
+        rmw_rcl,
+        rmw_rcr,
+        rmw_rol,
+        rmw_ror,
+        rmw_sar,
         rmw_sbb,
+        rmw_shl,
+        rmw_shld,
+        rmw_shr,
+        rmw_shrd,
         rmw_sub,
         rmw_xadd,
         rmw_xchg,
@@ -4112,36 +4121,25 @@ x86_emulate(
     case 0xc0 ... 0xc1: grp2: /* Grp2 */
         generate_exception_if(lock_prefix, EXC_UD);
 
-        if ( ops->rmw && dst.type == OP_MEM &&
-             (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val,
-                              dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
-            goto done;
-        dst.orig_val = dst.val;
-
         switch ( modrm_reg & 7 )
         {
-        case 0: /* rol */
-            emulate_2op_SrcB("rol", src, dst, _regs.eflags);
-            break;
-        case 1: /* ror */
-            emulate_2op_SrcB("ror", src, dst, _regs.eflags);
-            break;
-        case 2: /* rcl */
-            emulate_2op_SrcB("rcl", src, dst, _regs.eflags);
-            break;
-        case 3: /* rcr */
-            emulate_2op_SrcB("rcr", src, dst, _regs.eflags);
-            break;
-        case 4: /* sal/shl */
-        case 6: /* sal/shl */
-            emulate_2op_SrcB("sal", src, dst, _regs.eflags);
-            break;
-        case 5: /* shr */
-            emulate_2op_SrcB("shr", src, dst, _regs.eflags);
-            break;
-        case 7: /* sar */
-            emulate_2op_SrcB("sar", src, dst, _regs.eflags);
-            break;
+#define GRP2(name, ext) \
+        case ext: \
+            if ( ops->rmw && dst.type == OP_MEM ) \
+                state->rmw = rmw_##name; \
+            else \
+                emulate_2op_SrcB(#name, src, dst, _regs.eflags); \
+            break
+
+        GRP2(rol, 0);
+        GRP2(ror, 1);
+        GRP2(rcl, 2);
+        GRP2(rcr, 3);
+        case 6: /* sal/shl alias */
+        GRP2(shl, 4);
+        GRP2(shr, 5);
+        GRP2(sar, 7);
+#undef GRP2
         }
         break;
 
@@ -6571,11 +6569,6 @@ x86_emulate(
 
         generate_exception_if(lock_prefix, EXC_UD);
 
-        if ( ops->rmw && dst.type == OP_MEM &&
-             (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val,
-                              dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
-            goto done;
-
         if ( b & 1 )
             shift = _regs.cl;
         else
@@ -6584,6 +6577,14 @@ x86_emulate(
             src.reg = decode_gpr(&_regs, modrm_reg);
             src.val = truncate_word(*src.reg, dst.bytes);
         }
+
+        if ( ops->rmw && dst.type == OP_MEM )
+        {
+            ea.orig_val = shift;
+            state->rmw = b & 8 ? rmw_shrd : rmw_shld;
+            break;
+        }
+
         if ( (shift &= width - 1) == 0 )
             break;
         dst.orig_val = dst.val;
@@ -8726,6 +8727,11 @@ int x86_emul_rmw(
                                state->ea.val, dst, bytes, *eflags, \
                                "c" ((long)state->lock_prefix) ); \
         break
+#define SHIFT(op) \
+    case rmw_##op: \
+        ASSERT(!state->lock_prefix); \
+        _emulate_2op_SrcB(#op, state->ea.val, dst, bytes, *eflags); \
+        break
 
     BINOP(adc, );
     BINOP(add, );
@@ -8737,12 +8743,20 @@ int x86_emul_rmw(
      UNOP(inc);
      UNOP(neg);
     BINOP(or, );
+    SHIFT(rcl);
+    SHIFT(rcr);
+    SHIFT(rol);
+    SHIFT(ror);
+    SHIFT(sar);
     BINOP(sbb, );
+    SHIFT(shl);
+    SHIFT(shr);
     BINOP(sub, );
     BINOP(xor, );
 
 #undef UNOP
 #undef BINOP
+#undef SHIFT
 
     case rmw_not:
         switch ( state->op_bytes )
@@ -8768,6 +8782,20 @@ int x86_emul_rmw(
         }
         break;
 
+    case rmw_shld:
+        ASSERT(!state->lock_prefix);
+        _emulate_2op_SrcV_nobyte("shld",
+                                 state->ea.val, dst, bytes, *eflags,
+                                 "c" (state->ea.orig_val) );
+        break;
+
+    case rmw_shrd:
+        ASSERT(!state->lock_prefix);
+        _emulate_2op_SrcV_nobyte("shrd",
+                                 state->ea.val, dst, bytes, *eflags,
+                                 "c" (state->ea.orig_val) );
+        break;
+
     case rmw_xadd:
         switch ( state->op_bytes )
         {



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 10/14] x86/HVM: do actual CMPXCHG in hvmemul_cmpxchg()
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
                   ` (8 preceding siblings ...)
  2018-03-15 13:09 ` [PATCH v5 09/14] x86emul: also handle shifts through ->rmw() Jan Beulich
@ 2018-03-15 13:10 ` Jan Beulich
  2018-03-15 15:28   ` Andrew Cooper
  2018-03-15 13:10 ` [PATCH v5 11/14] x86/HVM: make use of new read-modify-write emulator hook Jan Beulich
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:10 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

..., at least as far as currently possible, i.e. when a mapping can be
obtained.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
---
v4: Add ASSERT_UNREACHABLE() to hvmemul_cmpxchg()'s switch().
v3: New.

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1297,8 +1297,86 @@ static int hvmemul_cmpxchg(
     bool lock,
     struct x86_emulate_ctxt *ctxt)
 {
-    /* Fix this in case the guest is really relying on r-m-w atomicity. */
-    return hvmemul_write(seg, offset, p_new, bytes, ctxt);
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct vcpu *curr = current;
+    unsigned long addr, reps = 1;
+    uint32_t pfec = PFEC_page_present | PFEC_write_access;
+    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+    int rc;
+    void *mapping = NULL;
+
+    rc = hvmemul_virtual_to_linear(
+        seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    if ( is_x86_system_segment(seg) )
+        pfec |= PFEC_implicit;
+    else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
+        pfec |= PFEC_user_mode;
+
+    mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt);
+    if ( IS_ERR(mapping) )
+        return ~PTR_ERR(mapping);
+
+    if ( !mapping )
+    {
+        /* Fix this in case the guest is really relying on r-m-w atomicity. */
+        return hvmemul_linear_mmio_write(addr, bytes, p_new, pfec,
+                                         hvmemul_ctxt,
+                                         vio->mmio_access.write_access &&
+                                         vio->mmio_gla == (addr & PAGE_MASK));
+    }
+
+    switch ( bytes )
+    {
+    case 1: case 2: case 4: case 8:
+    {
+        unsigned long old = 0, new = 0, cur;
+
+        memcpy(&old, p_old, bytes);
+        memcpy(&new, p_new, bytes);
+        if ( lock )
+            cur = __cmpxchg(mapping, old, new, bytes);
+        else
+            cur = cmpxchg_local_(mapping, old, new, bytes);
+        if ( cur != old )
+        {
+            memcpy(p_old, &cur, bytes);
+            rc = X86EMUL_CMPXCHG_FAILED;
+        }
+        break;
+    }
+
+    case 16:
+        if ( cpu_has_cx16 )
+        {
+            __uint128_t *old = p_old, cur;
+
+            if ( lock )
+                cur = __cmpxchg16b(mapping, old, p_new);
+            else
+                cur = cmpxchg16b_local_(mapping, old, p_new);
+            if ( cur != *old )
+            {
+                *old = cur;
+                rc = X86EMUL_CMPXCHG_FAILED;
+            }
+        }
+        else
+            rc = X86EMUL_UNHANDLEABLE;
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        rc = X86EMUL_UNHANDLEABLE;
+        break;
+    }
+
+    hvmemul_unmap_linear_addr(mapping, addr, bytes, hvmemul_ctxt);
+
+    return rc;
 }
 
 static int hvmemul_validate(
--- a/xen/include/asm-x86/system.h
+++ b/xen/include/asm-x86/system.h
@@ -110,6 +110,38 @@ static always_inline unsigned long __cmp
     return old;
 }
 
+static always_inline unsigned long cmpxchg_local_(
+    void *ptr, unsigned long old, unsigned long new, unsigned int size)
+{
+    unsigned long prev = ~old;
+
+    switch ( size )
+    {
+    case 1:
+        asm volatile ( "cmpxchgb %b2, %1"
+                       : "=a" (prev), "+m" (*(uint8_t *)ptr)
+                       : "q" (new), "0" (old) );
+        break;
+    case 2:
+        asm volatile ( "cmpxchgw %w2, %1"
+                       : "=a" (prev), "+m" (*(uint16_t *)ptr)
+                       : "r" (new), "0" (old) );
+        break;
+    case 4:
+        asm volatile ( "cmpxchgl %k2, %1"
+                       : "=a" (prev), "+m" (*(uint32_t *)ptr)
+                       : "r" (new), "0" (old) );
+        break;
+    case 8:
+        asm volatile ( "cmpxchgq %2, %1"
+                       : "=a" (prev), "+m" (*(uint64_t *)ptr)
+                       : "r" (new), "0" (old) );
+        break;
+    }
+
+    return prev;
+}
+
 #define cmpxchgptr(ptr,o,n) ({                                          \
     const __typeof__(**(ptr)) *__o = (o);                               \
     __typeof__(**(ptr)) *__n = (n);                                     \
--- a/xen/include/asm-x86/x86_64/system.h
+++ b/xen/include/asm-x86/x86_64/system.h
@@ -31,6 +31,24 @@ static always_inline __uint128_t __cmpxc
     return prev.raw;
 }
 
+static always_inline __uint128_t cmpxchg16b_local_(
+    void *ptr, const __uint128_t *oldp, const __uint128_t *newp)
+{
+    union {
+        struct { uint64_t lo, hi; };
+        __uint128_t raw;
+    } new = { .raw = *newp }, old = { .raw = *oldp }, prev;
+
+    ASSERT(cpu_has_cx16);
+
+    /* Don't use "=A" here - clang can't deal with that. */
+    asm volatile ( "cmpxchg16b %2"
+                   : "=d" (prev.hi), "=a" (prev.lo), "+m" (*(__uint128_t *)ptr)
+                   : "c" (new.hi), "b" (new.lo), "0" (old.hi), "1" (old.lo) );
+
+    return prev.raw;
+}
+
 #define cmpxchg16b(ptr, o, n) ({                           \
     volatile void *_p = (ptr);                             \
     ASSERT(!((unsigned long)_p & 0xf));                    \



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 11/14] x86/HVM: make use of new read-modify-write emulator hook
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
                   ` (9 preceding siblings ...)
  2018-03-15 13:10 ` [PATCH v5 10/14] x86/HVM: do actual CMPXCHG in hvmemul_cmpxchg() Jan Beulich
@ 2018-03-15 13:10 ` Jan Beulich
  2018-03-15 13:12 ` [PATCH v5 12/14] x86/HVM: use x86emul_write_xcr() Jan Beulich
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:10 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

..., at least as far as currently possible, i.e. when a mapping can be
obtained.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v4: bool_t -> bool.
v3: New.

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1188,6 +1188,61 @@ static int hvmemul_write(
     return X86EMUL_OKAY;
 }
 
+static int hvmemul_rmw(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned int bytes,
+    uint32_t *eflags,
+    struct x86_emulate_state *state,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    unsigned long addr, reps = 1;
+    uint32_t pfec = PFEC_page_present | PFEC_write_access;
+    struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
+    int rc;
+    void *mapping;
+
+    rc = hvmemul_virtual_to_linear(
+        seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr);
+    if ( rc != X86EMUL_OKAY || !bytes )
+        return rc;
+
+    if ( is_x86_system_segment(seg) )
+        pfec |= PFEC_implicit;
+    else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
+        pfec |= PFEC_user_mode;
+
+    mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt);
+    if ( IS_ERR(mapping) )
+        return ~PTR_ERR(mapping);
+
+    if ( mapping )
+    {
+        rc = x86_emul_rmw(mapping, bytes, eflags, state, ctxt);
+        hvmemul_unmap_linear_addr(mapping, addr, bytes, hvmemul_ctxt);
+    }
+    else
+    {
+        unsigned long data = 0;
+        bool known_gpfn = vio->mmio_access.write_access &&
+                          vio->mmio_gla == (addr & PAGE_MASK);
+
+        if ( bytes > sizeof(data) )
+            return X86EMUL_UNHANDLEABLE;
+        rc = hvmemul_linear_mmio_read(addr, bytes, &data, pfec, hvmemul_ctxt,
+                                      known_gpfn);
+        if ( rc == X86EMUL_OKAY )
+            rc = x86_emul_rmw(&data, bytes, eflags, state, ctxt);
+        if ( rc == X86EMUL_OKAY )
+            rc = hvmemul_linear_mmio_write(addr, bytes, &data, pfec,
+                                           hvmemul_ctxt, known_gpfn);
+    }
+
+    return rc;
+}
+
 static int hvmemul_write_discard(
     enum x86_segment seg,
     unsigned long offset,
@@ -2141,6 +2196,7 @@ static const struct x86_emulate_ops hvm_
     .read          = hvmemul_read,
     .insn_fetch    = hvmemul_insn_fetch,
     .write         = hvmemul_write,
+    .rmw           = hvmemul_rmw,
     .cmpxchg       = hvmemul_cmpxchg,
     .validate      = hvmemul_validate,
     .rep_ins       = hvmemul_rep_ins,




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 12/14] x86/HVM: use x86emul_write_xcr()
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
                   ` (10 preceding siblings ...)
  2018-03-15 13:10 ` [PATCH v5 11/14] x86/HVM: make use of new read-modify-write emulator hook Jan Beulich
@ 2018-03-15 13:12 ` Jan Beulich
  2018-03-15 15:43   ` Andrew Cooper
  2018-03-19 12:57   ` Tian, Kevin
  2018-03-15 13:13 ` [PATCH v5 13/14] x86/shadow: fully move unmap-dest into common code Jan Beulich
  2018-03-15 13:13 ` [PATCH v5 14/14] x86/shadow: fold sh_x86_emulate_{write, cmpxchg}() into their only callers Jan Beulich
  13 siblings, 2 replies; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:12 UTC (permalink / raw)
  To: xen-devel
  Cc: Kevin Tian, Suravee Suthikulpanit, George Dunlap, Andrew Cooper,
	Jun Nakajima, Boris Ostrovsky

...  instead of directly calling handle_xsetbv(), to make use of the
additional checking there.

Also don't call hvm_monitor_crX(XCR0, ...) for indexes other than zero
anymore.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: New.

--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1961,10 +1961,11 @@ int hvm_handle_xsetbv(u32 index, u64 new
 {
     int rc;
 
-    hvm_monitor_crX(XCR0, new_bv, current->arch.xcr0);
+    if ( index == 0 )
+        hvm_monitor_crX(XCR0, new_bv, current->arch.xcr0);
 
-    rc = handle_xsetbv(index, new_bv);
-    if ( rc )
+    rc = x86emul_write_xcr(index, new_bv, NULL);
+    if ( rc != X86EMUL_OKAY )
         hvm_inject_hw_exception(TRAP_gp_fault, 0);
 
     return rc;
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2902,7 +2902,7 @@ void svm_vmexit_handler(struct cpu_user_
         if ( vmcb_get_cpl(vmcb) )
             hvm_inject_hw_exception(TRAP_gp_fault, 0);
         else if ( (inst_len = __get_instruction_length(v, INSTR_XSETBV)) &&
-                  hvm_handle_xsetbv(regs->ecx, msr_fold(regs)) == 0 )
+                  hvm_handle_xsetbv(regs->ecx, msr_fold(regs)) == X86EMUL_OKAY )
             __update_guest_eip(regs, inst_len);
         break;
 
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -4084,7 +4084,7 @@ void vmx_vmexit_handler(struct cpu_user_
         break;
 
     case EXIT_REASON_XSETBV:
-        if ( hvm_handle_xsetbv(regs->ecx, msr_fold(regs)) == 0 )
+        if ( hvm_handle_xsetbv(regs->ecx, msr_fold(regs)) == X86EMUL_OKAY )
             update_guest_eip(); /* Safe: XSETBV */
         break;
 
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -66,6 +66,7 @@ int x86emul_read_xcr(unsigned int reg, u
     return X86EMUL_OKAY;
 }
 
+/* Note: May be called with ctxt=NULL. */
 int x86emul_write_xcr(unsigned int reg, uint64_t val,
                       struct x86_emulate_ctxt *ctxt)
 {
@@ -80,7 +81,8 @@ int x86emul_write_xcr(unsigned int reg,
         /* fall through */
     default:
     gp_fault:
-        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
+        if ( ctxt )
+            x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
         return X86EMUL_EXCEPTION;
     }
 




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 13/14] x86/shadow: fully move unmap-dest into common code
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
                   ` (11 preceding siblings ...)
  2018-03-15 13:12 ` [PATCH v5 12/14] x86/HVM: use x86emul_write_xcr() Jan Beulich
@ 2018-03-15 13:13 ` Jan Beulich
  2018-03-15 13:13 ` [PATCH v5 14/14] x86/shadow: fold sh_x86_emulate_{write, cmpxchg}() into their only callers Jan Beulich
  13 siblings, 0 replies; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:13 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

By adding guest PTE size to shadow emulation context, the work begun by
commit 2c80710a78 ("x86/shadow: compile most write emulation code just
once") can be completed, paving the road for further movement into
common code.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Tim Deegan <tim@xen.org>
---
v4: Adjust comment style in moved code.
v3: New.

--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -319,7 +319,8 @@ static const struct x86_emulate_ops hvm_
 };
 
 const struct x86_emulate_ops *shadow_init_emulation(
-    struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs)
+    struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs,
+    unsigned int pte_size)
 {
     struct segment_register *creg, *sreg;
     struct vcpu *v = current;
@@ -346,6 +347,8 @@ const struct x86_emulate_ops *shadow_ini
         sh_ctxt->ctxt.sp_size   = sreg->db ? 32 : 16;
     }
 
+    sh_ctxt->pte_size = pte_size;
+
     /* Attempt to prefetch whole instruction. */
     sh_ctxt->insn_buf_eip = regs->rip;
     sh_ctxt->insn_buf_bytes =
@@ -1770,6 +1773,45 @@ void *sh_emulate_map_dest(struct vcpu *v
 }
 
 /*
+ * Optimization: If we see two emulated writes of zeros to the same
+ * page-table without another kind of page fault in between, we guess
+ * that this is a batch of changes (for process destruction) and
+ * unshadow the page so we don't take a pagefault on every entry.  This
+ * should also make finding writeable mappings of pagetables much
+ * easier.
+ *
+ * Look to see if this is the second emulated write in a row to this
+ * page, and unshadow if it is.
+ */
+static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
+{
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    struct domain *d = v->domain;
+
+    /*
+     * If the domain has never made a "dying" op, use the two-writes
+     * heuristic; otherwise, unshadow as soon as we write a zero for a dying
+     * process.
+     *
+     * Don't bother trying to unshadow if it's not a PT, or if it's > l1.
+     */
+    if ( ( v->arch.paging.shadow.pagetable_dying
+           || ( !d->arch.paging.shadow.pagetable_dying_op
+                && v->arch.paging.shadow.last_emulated_mfn_for_unshadow == mfn_x(gmfn) ) )
+         && sh_mfn_is_a_page_table(gmfn)
+         && (!d->arch.paging.shadow.pagetable_dying_op ||
+             !(mfn_to_page(gmfn)->shadow_flags
+               & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64))) )
+    {
+        perfc_incr(shadow_early_unshadow);
+        sh_remove_shadows(d, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
+        TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EARLY_UNSHADOW);
+    }
+    v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(gmfn);
+#endif
+}
+
+/*
  * Tidy up after the emulated write: mark pages dirty, verify the new
  * contents, and undo the mapping.
  */
@@ -1778,6 +1820,21 @@ void sh_emulate_unmap_dest(struct vcpu *
 {
     u32 b1 = bytes, b2 = 0, shflags;
 
+    ASSERT(mfn_valid(sh_ctxt->mfn[0]));
+
+    /* If we are writing lots of PTE-aligned zeros, might want to unshadow */
+    if ( likely(bytes >= 4) && (*(u32 *)addr == 0) )
+    {
+        if ( !((unsigned long)addr & (sh_ctxt->pte_size - 1)) )
+            check_for_early_unshadow(v, sh_ctxt->mfn[0]);
+        /*
+         * Don't reset the heuristic if we're writing zeros at non-aligned
+         * addresses, otherwise it doesn't catch REP MOVSD on PAE guests.
+         */
+    }
+    else
+        sh_reset_early_unshadow(v);
+
     /*
      * We can avoid re-verifying the page contents after the write if:
      *  - it was no larger than the PTE type of this pagetable;
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -2533,52 +2533,6 @@ sh_map_and_validate_gl1e(struct vcpu *v,
 
 
 /**************************************************************************/
-/* Optimization: If we see two emulated writes of zeros to the same
- * page-table without another kind of page fault in between, we guess
- * that this is a batch of changes (for process destruction) and
- * unshadow the page so we don't take a pagefault on every entry.  This
- * should also make finding writeable mappings of pagetables much
- * easier. */
-
-/* Look to see if this is the second emulated write in a row to this
- * page, and unshadow if it is */
-static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
-{
-#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
-    struct domain *d = v->domain;
-    /* If the domain has never made a "dying" op, use the two-writes
-     * heuristic; otherwise, unshadow as soon as we write a zero for a dying
-     * process.
-     *
-     * Don't bother trying to unshadow if it's not a PT, or if it's > l1.
-     */
-    if ( ( v->arch.paging.shadow.pagetable_dying
-           || ( !d->arch.paging.shadow.pagetable_dying_op
-                && v->arch.paging.shadow.last_emulated_mfn_for_unshadow == mfn_x(gmfn) ) )
-         && sh_mfn_is_a_page_table(gmfn)
-         && (!d->arch.paging.shadow.pagetable_dying_op ||
-             !(mfn_to_page(gmfn)->shadow_flags
-               & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64))) )
-    {
-        perfc_incr(shadow_early_unshadow);
-        sh_remove_shadows(d, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
-        TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EARLY_UNSHADOW);
-    }
-    v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(gmfn);
-#endif
-}
-
-/* Stop counting towards early unshadows, as we've seen a real page fault */
-static inline void reset_early_unshadow(struct vcpu *v)
-{
-#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
-    v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(INVALID_MFN);
-#endif
-}
-
-
-
-/**************************************************************************/
 /* Optimization: Prefetch multiple L1 entries.  This is called after we have
  * demand-faulted a shadow l1e in the fault handler, to see if it's
  * worth fetching some more.
@@ -2941,7 +2895,7 @@ static int sh_page_fault(struct vcpu *v,
                  * a not-present fault (by flipping two bits). */
                 ASSERT(regs->error_code & PFEC_page_present);
                 regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present);
-                reset_early_unshadow(v);
+                sh_reset_early_unshadow(v);
                 perfc_incr(shadow_fault_fast_gnp);
                 SHADOW_PRINTK("fast path not-present\n");
                 trace_shadow_gen(TRC_SHADOW_FAST_PROPAGATE, va);
@@ -2957,7 +2911,7 @@ static int sh_page_fault(struct vcpu *v,
             }
             perfc_incr(shadow_fault_fast_mmio);
             SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa);
-            reset_early_unshadow(v);
+            sh_reset_early_unshadow(v);
             trace_shadow_gen(TRC_SHADOW_FAST_MMIO, va);
             return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT, access)
                     ? EXCRET_fault_fixed : 0);
@@ -3069,7 +3023,7 @@ static int sh_page_fault(struct vcpu *v,
     {
         perfc_incr(shadow_fault_bail_real_fault);
         SHADOW_PRINTK("not a shadow fault\n");
-        reset_early_unshadow(v);
+        sh_reset_early_unshadow(v);
         regs->error_code = gw.pfec & PFEC_arch_mask;
         goto propagate;
     }
@@ -3095,7 +3049,7 @@ static int sh_page_fault(struct vcpu *v,
         perfc_incr(shadow_fault_bail_bad_gfn);
         SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n",
                       gfn_x(gfn), mfn_x(gmfn));
-        reset_early_unshadow(v);
+        sh_reset_early_unshadow(v);
         put_gfn(d, gfn_x(gfn));
         goto propagate;
     }
@@ -3284,7 +3238,7 @@ static int sh_page_fault(struct vcpu *v,
 
     perfc_incr(shadow_fault_fixed);
     d->arch.paging.log_dirty.fault_count++;
-    reset_early_unshadow(v);
+    sh_reset_early_unshadow(v);
 
     trace_shadow_fixup(gw.l1e, va);
  done:
@@ -3399,7 +3353,7 @@ static int sh_page_fault(struct vcpu *v,
 
     SHADOW_PRINTK("emulate: eip=%#lx esp=%#lx\n", regs->rip, regs->rsp);
 
-    emul_ops = shadow_init_emulation(&emul_ctxt, regs);
+    emul_ops = shadow_init_emulation(&emul_ctxt, regs, GUEST_PTE_SIZE);
 
     r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
 
@@ -3539,7 +3493,7 @@ static int sh_page_fault(struct vcpu *v,
     sh_audit_gw(v, &gw);
     SHADOW_PRINTK("mmio %#"PRIpaddr"\n", gpa);
     shadow_audit_tables(v);
-    reset_early_unshadow(v);
+    sh_reset_early_unshadow(v);
     paging_unlock(d);
     put_gfn(d, gfn_x(gfn));
     trace_shadow_gen(TRC_SHADOW_MMIO, va);
@@ -3550,7 +3504,7 @@ static int sh_page_fault(struct vcpu *v,
     sh_audit_gw(v, &gw);
     SHADOW_PRINTK("not a shadow fault\n");
     shadow_audit_tables(v);
-    reset_early_unshadow(v);
+    sh_reset_early_unshadow(v);
     paging_unlock(d);
     put_gfn(d, gfn_x(gfn));
 
@@ -4659,29 +4613,6 @@ static void sh_pagetable_dying(struct vc
 /**************************************************************************/
 /* Handling guest writes to pagetables. */
 
-/* Tidy up after the emulated write: mark pages dirty, verify the new
- * contents, and undo the mapping */
-static void emulate_unmap_dest(struct vcpu *v,
-                               void *addr,
-                               u32 bytes,
-                               struct sh_emulate_ctxt *sh_ctxt)
-{
-    ASSERT(mfn_valid(sh_ctxt->mfn[0]));
-
-    /* If we are writing lots of PTE-aligned zeros, might want to unshadow */
-    if ( likely(bytes >= 4) && (*(u32 *)addr == 0) )
-    {
-        if ( ((unsigned long) addr & ((sizeof (guest_intpte_t)) - 1)) == 0 )
-            check_for_early_unshadow(v, sh_ctxt->mfn[0]);
-        /* Don't reset the heuristic if we're writing zeros at non-aligned
-         * addresses, otherwise it doesn't catch REP MOVSD on PAE guests */
-    }
-    else
-        reset_early_unshadow(v);
-
-    sh_emulate_unmap_dest(v, addr, bytes, sh_ctxt);
-}
-
 static int
 sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
                      u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
@@ -4715,7 +4646,7 @@ sh_x86_emulate_write(struct vcpu *v, uns
 #endif
     }
 
-    emulate_unmap_dest(v, addr, bytes, sh_ctxt);
+    sh_emulate_unmap_dest(v, addr, bytes, sh_ctxt);
     shadow_audit_tables(v);
     paging_unlock(v->domain);
     return X86EMUL_OKAY;
@@ -4760,7 +4691,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
                   " wanted %#lx now %#lx bytes %u\n",
                   vaddr, prev, old, new, *(unsigned long *)addr, bytes);
 
-    emulate_unmap_dest(v, addr, bytes, sh_ctxt);
+    sh_emulate_unmap_dest(v, addr, bytes, sh_ctxt);
     shadow_audit_tables(v);
     paging_unlock(v->domain);
     return rv;
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -721,6 +721,8 @@ struct sh_emulate_ctxt {
     uint8_t insn_buf_bytes;
     unsigned long insn_buf_eip;
 
+    unsigned int pte_size;
+
     /* Cache of segment registers already gathered for this emulation. */
     unsigned int valid_seg_regs;
     struct segment_register seg_reg[6];
@@ -736,10 +738,19 @@ struct sh_emulate_ctxt {
 };
 
 const struct x86_emulate_ops *shadow_init_emulation(
-    struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
+    struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs,
+    unsigned int pte_size);
 void shadow_continue_emulation(
     struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
 
+/* Stop counting towards early unshadows, as we've seen a real page fault */
+static inline void sh_reset_early_unshadow(struct vcpu *v)
+{
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(INVALID_MFN);
+#endif
+}
+
 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
 /**************************************************************************/
 /* Virtual TLB entries



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5 14/14] x86/shadow: fold sh_x86_emulate_{write, cmpxchg}() into their only callers
  2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
                   ` (12 preceding siblings ...)
  2018-03-15 13:13 ` [PATCH v5 13/14] x86/shadow: fully move unmap-dest into common code Jan Beulich
@ 2018-03-15 13:13 ` Jan Beulich
  13 siblings, 0 replies; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:13 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

The functions have a single caller only and are now guest paging type
independent (except for the tracing part), so have no need to exist as
standalone ones, let alone multiple times. Replace the two prior hooks
with just a single one for dealing with tracing.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Tim Deegan <tim@xen.org>
---
v3: New.

--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -118,6 +118,20 @@ __initcall(shadow_audit_key_init);
  */
 
 /*
+ * Returns a mapped pointer to write to, or one of the following error
+ * indicators.
+ */
+#define MAPPING_UNHANDLEABLE ERR_PTR(~(long)X86EMUL_UNHANDLEABLE)
+#define MAPPING_EXCEPTION    ERR_PTR(~(long)X86EMUL_EXCEPTION)
+#define MAPPING_SILENT_FAIL  ERR_PTR(~(long)X86EMUL_OKAY)
+static void *sh_emulate_map_dest(struct vcpu *v, unsigned long vaddr,
+                                 unsigned int bytes,
+                                 struct sh_emulate_ctxt *sh_ctxt);
+static void sh_emulate_unmap_dest(struct vcpu *v, void *addr,
+                                  unsigned int bytes,
+                                  struct sh_emulate_ctxt *sh_ctxt);
+
+/*
  * Callers which pass a known in-range x86_segment can rely on the return
  * pointer being valid.  Other callers must explicitly check for errors.
  */
@@ -260,6 +274,7 @@ hvm_emulate_write(enum x86_segment seg,
         container_of(ctxt, struct sh_emulate_ctxt, ctxt);
     struct vcpu *v = current;
     unsigned long addr;
+    void *ptr;
     int rc;
 
     /* How many emulations could we save if we unshadowed on stack writes? */
@@ -271,8 +286,26 @@ hvm_emulate_write(enum x86_segment seg,
     if ( rc || !bytes )
         return rc;
 
-    return v->arch.paging.mode->shadow.x86_emulate_write(
-        v, addr, p_data, bytes, sh_ctxt);
+    /* Unaligned writes are only acceptable on HVM */
+    if ( (addr & (bytes - 1)) && !is_hvm_vcpu(v)  )
+        return X86EMUL_UNHANDLEABLE;
+
+    ptr = sh_emulate_map_dest(v, addr, bytes, sh_ctxt);
+    if ( IS_ERR(ptr) )
+        return ~PTR_ERR(ptr);
+
+    paging_lock(v->domain);
+    memcpy(ptr, p_data, bytes);
+
+    if ( tb_init_done )
+        v->arch.paging.mode->shadow.trace_emul_write_val(ptr, addr,
+                                                         p_data, bytes);
+
+    sh_emulate_unmap_dest(v, ptr, bytes, sh_ctxt);
+    shadow_audit_tables(v);
+    paging_unlock(v->domain);
+
+    return X86EMUL_OKAY;
 }
 
 static int
@@ -287,7 +320,8 @@ hvm_emulate_cmpxchg(enum x86_segment seg
     struct sh_emulate_ctxt *sh_ctxt =
         container_of(ctxt, struct sh_emulate_ctxt, ctxt);
     struct vcpu *v = current;
-    unsigned long addr, old, new;
+    unsigned long addr, old, new, prev;
+    void *ptr;
     int rc;
 
     if ( bytes > sizeof(long) )
@@ -298,14 +332,43 @@ hvm_emulate_cmpxchg(enum x86_segment seg
     if ( rc )
         return rc;
 
+    /* Unaligned writes are only acceptable on HVM */
+    if ( (addr & (bytes - 1)) && !is_hvm_vcpu(v)  )
+        return X86EMUL_UNHANDLEABLE;
+
+    ptr = sh_emulate_map_dest(v, addr, bytes, sh_ctxt);
+    if ( IS_ERR(ptr) )
+        return ~PTR_ERR(ptr);
+
     old = new = 0;
     memcpy(&old, p_old, bytes);
     memcpy(&new, p_new, bytes);
 
-    rc = v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
-             v, addr, &old, new, bytes, sh_ctxt);
+    paging_lock(v->domain);
+    switch ( bytes )
+    {
+    case 1: prev = cmpxchg((uint8_t  *)ptr, old, new); break;
+    case 2: prev = cmpxchg((uint16_t *)ptr, old, new); break;
+    case 4: prev = cmpxchg((uint32_t *)ptr, old, new); break;
+    case 8: prev = cmpxchg((uint64_t *)ptr, old, new); break;
+    default:
+        SHADOW_PRINTK("cmpxchg size %u is not supported\n", bytes);
+        prev = ~old;
+    }
+
+    if ( prev != old )
+    {
+        memcpy(p_old, &prev, bytes);
+        rc = X86EMUL_CMPXCHG_FAILED;
+    }
+
+    SHADOW_DEBUG(EMULATE,
+                 "va %#lx was %#lx expected %#lx wanted %#lx now %#lx bytes %u\n",
+                 addr, prev, old, new, *(unsigned long *)ptr, bytes);
 
-    memcpy(p_old, &old, bytes);
+    sh_emulate_unmap_dest(v, ptr, bytes, sh_ctxt);
+    shadow_audit_tables(v);
+    paging_unlock(v->domain);
 
     return rc;
 }
@@ -1684,9 +1747,9 @@ static mfn_t emulate_gva_to_mfn(struct v
  * returned, page references will be held on sh_ctxt->mfn[0] and
  * sh_ctxt->mfn[1] iff !INVALID_MFN.
  */
-void *sh_emulate_map_dest(struct vcpu *v, unsigned long vaddr,
-                          unsigned int bytes,
-                          struct sh_emulate_ctxt *sh_ctxt)
+static void *sh_emulate_map_dest(struct vcpu *v, unsigned long vaddr,
+                                 unsigned int bytes,
+                                 struct sh_emulate_ctxt *sh_ctxt)
 {
     struct domain *d = v->domain;
     void *map;
@@ -1815,8 +1878,9 @@ static inline void check_for_early_unsha
  * Tidy up after the emulated write: mark pages dirty, verify the new
  * contents, and undo the mapping.
  */
-void sh_emulate_unmap_dest(struct vcpu *v, void *addr, unsigned int bytes,
-                           struct sh_emulate_ctxt *sh_ctxt)
+static void sh_emulate_unmap_dest(struct vcpu *v, void *addr,
+                                  unsigned int bytes,
+                                  struct sh_emulate_ctxt *sh_ctxt)
 {
     u32 b1 = bytes, b2 = 0, shflags;
 
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -2743,6 +2743,25 @@ static DEFINE_PER_CPU(int,trace_extra_em
 #endif
 static DEFINE_PER_CPU(guest_pa_t,trace_emulate_write_val);
 
+static void trace_emulate_write_val(const void *ptr, unsigned long vaddr,
+                                    const void *src, unsigned int bytes)
+{
+#if GUEST_PAGING_LEVELS == 3
+    if ( vaddr == this_cpu(trace_emulate_initial_va) )
+        memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
+    else if ( (vaddr & ~(GUEST_PTE_SIZE - 1)) ==
+              this_cpu(trace_emulate_initial_va) )
+    {
+        TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATE_FULL_PT);
+        memcpy(&this_cpu(trace_emulate_write_val),
+               (typeof(ptr))((unsigned long)ptr & ~(GUEST_PTE_SIZE - 1)),
+               GUEST_PTE_SIZE);
+    }
+#else
+    memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
+#endif
+}
+
 static inline void trace_shadow_emulate(guest_l1e_t gl1e, unsigned long va)
 {
     if ( tb_init_done )
@@ -4611,93 +4630,6 @@ static void sh_pagetable_dying(struct vc
 #endif
 
 /**************************************************************************/
-/* Handling guest writes to pagetables. */
-
-static int
-sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
-                     u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
-{
-    void *addr;
-
-    /* Unaligned writes are only acceptable on HVM */
-    if ( (vaddr & (bytes - 1)) && !is_hvm_vcpu(v)  )
-        return X86EMUL_UNHANDLEABLE;
-
-    addr = sh_emulate_map_dest(v, vaddr, bytes, sh_ctxt);
-    if ( IS_ERR(addr) )
-        return ~PTR_ERR(addr);
-
-    paging_lock(v->domain);
-    memcpy(addr, src, bytes);
-
-    if ( tb_init_done )
-    {
-#if GUEST_PAGING_LEVELS == 3
-        if ( vaddr == this_cpu(trace_emulate_initial_va) )
-            memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
-        else if ( (vaddr & ~(0x7UL)) == this_cpu(trace_emulate_initial_va) )
-        {
-            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATE_FULL_PT);
-            memcpy(&this_cpu(trace_emulate_write_val),
-                   (void *)(((unsigned long) addr) & ~(0x7UL)), GUEST_PTE_SIZE);
-        }
-#else
-        memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
-#endif
-    }
-
-    sh_emulate_unmap_dest(v, addr, bytes, sh_ctxt);
-    shadow_audit_tables(v);
-    paging_unlock(v->domain);
-    return X86EMUL_OKAY;
-}
-
-static int
-sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr,
-                       unsigned long *p_old, unsigned long new,
-                       unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
-{
-    void *addr;
-    unsigned long prev, old = *p_old;
-    int rv = X86EMUL_OKAY;
-
-    /* Unaligned writes are only acceptable on HVM */
-    if ( (vaddr & (bytes - 1)) && !is_hvm_vcpu(v)  )
-        return X86EMUL_UNHANDLEABLE;
-
-    addr = sh_emulate_map_dest(v, vaddr, bytes, sh_ctxt);
-    if ( IS_ERR(addr) )
-        return ~PTR_ERR(addr);
-
-    paging_lock(v->domain);
-    switch ( bytes )
-    {
-    case 1: prev = cmpxchg(((u8 *)addr), old, new);  break;
-    case 2: prev = cmpxchg(((u16 *)addr), old, new); break;
-    case 4: prev = cmpxchg(((u32 *)addr), old, new); break;
-    case 8: prev = cmpxchg(((u64 *)addr), old, new); break;
-    default:
-        SHADOW_PRINTK("cmpxchg of size %i is not supported\n", bytes);
-        prev = ~old;
-    }
-
-    if ( prev != old )
-    {
-        *p_old = prev;
-        rv = X86EMUL_CMPXCHG_FAILED;
-    }
-
-    SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx"
-                  " wanted %#lx now %#lx bytes %u\n",
-                  vaddr, prev, old, new, *(unsigned long *)addr, bytes);
-
-    sh_emulate_unmap_dest(v, addr, bytes, sh_ctxt);
-    shadow_audit_tables(v);
-    paging_unlock(v->domain);
-    return rv;
-}
-
-/**************************************************************************/
 /* Audit tools */
 
 #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
@@ -5018,8 +4950,6 @@ const struct paging_mode sh_paging_mode
     .write_p2m_entry               = shadow_write_p2m_entry,
     .guest_levels                  = GUEST_PAGING_LEVELS,
     .shadow.detach_old_tables      = sh_detach_old_tables,
-    .shadow.x86_emulate_write      = sh_x86_emulate_write,
-    .shadow.x86_emulate_cmpxchg    = sh_x86_emulate_cmpxchg,
     .shadow.write_guest_entry      = sh_write_guest_entry,
     .shadow.cmpxchg_guest_entry    = sh_cmpxchg_guest_entry,
     .shadow.make_monitor_table     = sh_make_monitor_table,
@@ -5028,6 +4958,7 @@ const struct paging_mode sh_paging_mode
     .shadow.guess_wrmap            = sh_guess_wrmap,
 #endif
     .shadow.pagetable_dying        = sh_pagetable_dying,
+    .shadow.trace_emul_write_val   = trace_emulate_write_val,
     .shadow.shadow_levels          = SHADOW_PAGING_LEVELS,
 };
 
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -393,16 +393,6 @@ void shadow_update_paging_modes(struct v
  * With user_only == 1, unhooks only the user-mode mappings. */
 void shadow_unhook_mappings(struct domain *d, mfn_t smfn, int user_only);
 
-/* Returns a mapped pointer to write to, or one of the following error
- * indicators. */
-#define MAPPING_UNHANDLEABLE ERR_PTR(~(long)X86EMUL_UNHANDLEABLE)
-#define MAPPING_EXCEPTION    ERR_PTR(~(long)X86EMUL_EXCEPTION)
-#define MAPPING_SILENT_FAIL  ERR_PTR(~(long)X86EMUL_OKAY)
-void *sh_emulate_map_dest(struct vcpu *v, unsigned long vaddr,
-                          unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt);
-void sh_emulate_unmap_dest(struct vcpu *v, void *addr, unsigned int bytes,
-                           struct sh_emulate_ctxt *sh_ctxt);
-
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
 /* Allow a shadowed page to go out of sync */
 int sh_unsync(struct vcpu *v, mfn_t gmfn);
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -248,8 +248,6 @@ static inline shadow_l4e_t shadow_l4e_fr
 #define sh_unhook_64b_mappings     INTERNAL_NAME(sh_unhook_64b_mappings)
 #define sh_paging_mode             INTERNAL_NAME(sh_paging_mode)
 #define sh_detach_old_tables       INTERNAL_NAME(sh_detach_old_tables)
-#define sh_x86_emulate_write       INTERNAL_NAME(sh_x86_emulate_write)
-#define sh_x86_emulate_cmpxchg     INTERNAL_NAME(sh_x86_emulate_cmpxchg)
 #define sh_audit_l1_table          INTERNAL_NAME(sh_audit_l1_table)
 #define sh_audit_fl1_table         INTERNAL_NAME(sh_audit_fl1_table)
 #define sh_audit_l2_table          INTERNAL_NAME(sh_audit_l2_table)
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -82,14 +82,6 @@ struct sh_emulate_ctxt;
 struct shadow_paging_mode {
 #ifdef CONFIG_SHADOW_PAGING
     void          (*detach_old_tables     )(struct vcpu *v);
-    int           (*x86_emulate_write     )(struct vcpu *v, unsigned long va,
-                                            void *src, u32 bytes,
-                                            struct sh_emulate_ctxt *sh_ctxt);
-    int           (*x86_emulate_cmpxchg   )(struct vcpu *v, unsigned long va,
-                                            unsigned long *old,
-                                            unsigned long new,
-                                            unsigned int bytes,
-                                            struct sh_emulate_ctxt *sh_ctxt);
     bool          (*write_guest_entry     )(struct vcpu *v, intpte_t *p,
                                             intpte_t new, mfn_t gmfn);
     bool          (*cmpxchg_guest_entry   )(struct vcpu *v, intpte_t *p,
@@ -100,6 +92,8 @@ struct shadow_paging_mode {
     int           (*guess_wrmap           )(struct vcpu *v, 
                                             unsigned long vaddr, mfn_t gmfn);
     void          (*pagetable_dying       )(struct vcpu *v, paddr_t gpa);
+    void          (*trace_emul_write_val  )(const void *ptr, unsigned long vaddr,
+                                            const void *src, unsigned int bytes);
 #endif
     /* For outsiders to tell what mode we're in */
     unsigned int shadow_levels;



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 01/14] x86emul: support 3DNow! insns
  2018-03-15 13:02 ` [PATCH v5 01/14] x86emul: support 3DNow! insns Jan Beulich
@ 2018-03-15 13:24   ` Andrew Cooper
  0 siblings, 0 replies; 33+ messages in thread
From: Andrew Cooper @ 2018-03-15 13:24 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: George Dunlap

On 15/03/18 13:02, Jan Beulich wrote:
> Yes, recent AMD CPUs don't support them anymore, but I think we should
> nevertheless cope.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 03/14] x86emul: abstract out XCRn accesses
  2018-03-15 13:04 ` [PATCH v5 03/14] x86emul: abstract out XCRn accesses Jan Beulich
@ 2018-03-15 13:35   ` Andrew Cooper
  2018-03-15 13:44     ` Jan Beulich
  2018-03-15 15:41   ` Andrew Cooper
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 33+ messages in thread
From: Andrew Cooper @ 2018-03-15 13:35 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: George Dunlap, Paul Durrant

On 15/03/18 13:04, Jan Beulich wrote:
>  static inline void x86_emul_hw_exception(
> --- a/xen/arch/x86/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate.c
> @@ -42,3 +42,50 @@
>  })
>  
>  #include "x86_emulate/x86_emulate.c"
> +
> +int x86emul_read_xcr(unsigned int reg, uint64_t *val,
> +                     struct x86_emulate_ctxt *ctxt)
> +{
> +    switch ( reg )
> +    {
> +    case 0:
> +        *val = current->arch.xcr0;
> +        return X86EMUL_OKAY;
> +
> +    case 1:
> +        if ( cpu_has_xgetbv1 && current->domain->arch.cpuid->xstate.xgetbv1 )

You can drop the cpu_has_xgetbv1 part of this test.  The CPUID policy
derivation logic won't allow xstate.xgetbv1 to be set without
cpu_has_xgetbv1

Otherwise, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 03/14] x86emul: abstract out XCRn accesses
  2018-03-15 13:35   ` Andrew Cooper
@ 2018-03-15 13:44     ` Jan Beulich
  2018-03-15 13:55       ` Andrew Cooper
  0 siblings, 1 reply; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 13:44 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: George Dunlap, xen-devel, Paul Durrant

>>> On 15.03.18 at 14:35, <andrew.cooper3@citrix.com> wrote:
> On 15/03/18 13:04, Jan Beulich wrote:
>>  static inline void x86_emul_hw_exception(
>> --- a/xen/arch/x86/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate.c
>> @@ -42,3 +42,50 @@
>>  })
>>  
>>  #include "x86_emulate/x86_emulate.c"
>> +
>> +int x86emul_read_xcr(unsigned int reg, uint64_t *val,
>> +                     struct x86_emulate_ctxt *ctxt)
>> +{
>> +    switch ( reg )
>> +    {
>> +    case 0:
>> +        *val = current->arch.xcr0;
>> +        return X86EMUL_OKAY;
>> +
>> +    case 1:
>> +        if ( cpu_has_xgetbv1 && current->domain->arch.cpuid->xstate.xgetbv1 )
> 
> You can drop the cpu_has_xgetbv1 part of this test.  The CPUID policy
> derivation logic won't allow xstate.xgetbv1 to be set without
> cpu_has_xgetbv1

Hmm, I wasn't sure: Is that already the case? I thought
auditing of what the tool stack provides is only planned.

> Otherwise, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

Thanks, but please clarify the above.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 03/14] x86emul: abstract out XCRn accesses
  2018-03-15 13:44     ` Jan Beulich
@ 2018-03-15 13:55       ` Andrew Cooper
  0 siblings, 0 replies; 33+ messages in thread
From: Andrew Cooper @ 2018-03-15 13:55 UTC (permalink / raw)
  To: Jan Beulich; +Cc: George Dunlap, xen-devel, Paul Durrant

On 15/03/18 13:44, Jan Beulich wrote:
>>>> On 15.03.18 at 14:35, <andrew.cooper3@citrix.com> wrote:
>> On 15/03/18 13:04, Jan Beulich wrote:
>>>  static inline void x86_emul_hw_exception(
>>> --- a/xen/arch/x86/x86_emulate.c
>>> +++ b/xen/arch/x86/x86_emulate.c
>>> @@ -42,3 +42,50 @@
>>>  })
>>>  
>>>  #include "x86_emulate/x86_emulate.c"
>>> +
>>> +int x86emul_read_xcr(unsigned int reg, uint64_t *val,
>>> +                     struct x86_emulate_ctxt *ctxt)
>>> +{
>>> +    switch ( reg )
>>> +    {
>>> +    case 0:
>>> +        *val = current->arch.xcr0;
>>> +        return X86EMUL_OKAY;
>>> +
>>> +    case 1:
>>> +        if ( cpu_has_xgetbv1 && current->domain->arch.cpuid->xstate.xgetbv1 )
>> You can drop the cpu_has_xgetbv1 part of this test.  The CPUID policy
>> derivation logic won't allow xstate.xgetbv1 to be set without
>> cpu_has_xgetbv1
> Hmm, I wasn't sure: Is that already the case? I thought
> auditing of what the tool stack provides is only planned.
>
>> Otherwise, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Thanks, but please clarify the above.

We currently zero out bad toolstack choices.  The longterm changes will
be to hand an error back, rather than ignore bad choices.

In calculate_{pv,hvm}_max_policy(), we first copy the host cpuid policy,
then &= down by the information derived from the magic comments in
arch-x86/cpufeatureset.h

Then in recalculate_cpuid_policy(), we:

/* Clamp the toolstacks choices to reality. */
for ( i = 0; i < ARRAY_SIZE(fs); i++ )
    fs[i] &= max_fs[i];

The purpose of doing this is so we can rely on the cpuid policy object
being suitable without further qualification.

The only issues come when we decide to emulate pipeline capabilities
without hardware support, which will then fall into the same category as
the difference between host_and_vcpu_must_have() and vcpu_must_have() in
the main emulator.  We've got just a single example of this currently,
which is PV guests PVRDTSC support, and that code isn't long for this world.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 08/14] x86emul: add read-modify-write hook
  2018-03-15 13:08 ` [PATCH v5 08/14] x86emul: add read-modify-write hook Jan Beulich
@ 2018-03-15 14:19   ` Andrew Cooper
  2018-03-15 14:46     ` Jan Beulich
  0 siblings, 1 reply; 33+ messages in thread
From: Andrew Cooper @ 2018-03-15 14:19 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: George Dunlap

On 15/03/18 13:08, Jan Beulich wrote:
> @@ -8517,6 +8690,142 @@ x86_emulate(
>  #undef vex
>  #undef ea
>  
> +int x86_emul_rmw(
> +    void *ptr,
> +    unsigned int bytes,
> +    uint32_t *eflags,
> +    struct x86_emulate_state *state,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    unsigned long *dst = ptr;
> +
> +    ASSERT(bytes == state->op_bytes);
> +
> +#ifdef __x86_64__
> +# define JCXZ "jrcxz"
> +#else
> +# define JCXZ "jecxz"
> +#endif
> +
> +#define COND_LOCK(op) \
> +    JCXZ " .L" #op "%=\n\t" \
> +    "lock\n" \
> +    ".L" #op "%=:\n\t" \
> +    #op

I'd forgotten that these encoding of jmp existed, but various ORMs
suggest that it is far slower to execute than other jumps.

Irrespective of the instruction latency argument, you'll get better code
generation with cond_op() looking rather more like:

cmpb $0, %[lock]
je 1f
lock
1: #op

which will most likely cause the cmp to be encoded with a memory
operand, rather than forcing it the value into %ecx.

Everything else looks ok, so a provisional Acked-by: Andrew Cooper
<andrew.cooper3@citrix.com> if you choose to follow this pattern.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 09/14] x86emul: also handle shifts through ->rmw()
  2018-03-15 13:09 ` [PATCH v5 09/14] x86emul: also handle shifts through ->rmw() Jan Beulich
@ 2018-03-15 14:23   ` Andrew Cooper
  0 siblings, 0 replies; 33+ messages in thread
From: Andrew Cooper @ 2018-03-15 14:23 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: George Dunlap

On 15/03/18 13:09, Jan Beulich wrote:
> These don't allow LOCK, but still are read-modify-write operations, so
> are better handled that way too.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 08/14] x86emul: add read-modify-write hook
  2018-03-15 14:19   ` Andrew Cooper
@ 2018-03-15 14:46     ` Jan Beulich
  2018-03-15 14:48       ` Andrew Cooper
  0 siblings, 1 reply; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 14:46 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: George Dunlap, xen-devel

>>> On 15.03.18 at 15:19, <andrew.cooper3@citrix.com> wrote:
> On 15/03/18 13:08, Jan Beulich wrote:
>> @@ -8517,6 +8690,142 @@ x86_emulate(
>>  #undef vex
>>  #undef ea
>>  
>> +int x86_emul_rmw(
>> +    void *ptr,
>> +    unsigned int bytes,
>> +    uint32_t *eflags,
>> +    struct x86_emulate_state *state,
>> +    struct x86_emulate_ctxt *ctxt)
>> +{
>> +    unsigned long *dst = ptr;
>> +
>> +    ASSERT(bytes == state->op_bytes);
>> +
>> +#ifdef __x86_64__
>> +# define JCXZ "jrcxz"
>> +#else
>> +# define JCXZ "jecxz"
>> +#endif
>> +
>> +#define COND_LOCK(op) \
>> +    JCXZ " .L" #op "%=\n\t" \
>> +    "lock\n" \
>> +    ".L" #op "%=:\n\t" \
>> +    #op
> 
> I'd forgotten that these encoding of jmp existed, but various ORMs
> suggest that it is far slower to execute than other jumps.
> 
> Irrespective of the instruction latency argument, you'll get better code
> generation with cond_op() looking rather more like:
> 
> cmpb $0, %[lock]
> je 1f
> lock
> 1: #op
> 
> which will most likely cause the cmp to be encoded with a memory
> operand, rather than forcing it the value into %ecx.

You're missing the main point of having chosen J{E,R}CXZ (arguably
I should have added a comment, and I now will): This operation sits
inside the range where we have loaded guest EFLAGS (the status
ones) into the hardware EFLAGS register. For ADC, SBB, RCL, and
RCR to function we must not alter CF here, and for flags not
modified by an insn (e.g. everything other than CF for BT*) we
must not alter any of the other status flags.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 08/14] x86emul: add read-modify-write hook
  2018-03-15 14:46     ` Jan Beulich
@ 2018-03-15 14:48       ` Andrew Cooper
  0 siblings, 0 replies; 33+ messages in thread
From: Andrew Cooper @ 2018-03-15 14:48 UTC (permalink / raw)
  To: Jan Beulich; +Cc: George Dunlap, xen-devel

On 15/03/18 14:46, Jan Beulich wrote:
>>>> On 15.03.18 at 15:19, <andrew.cooper3@citrix.com> wrote:
>> On 15/03/18 13:08, Jan Beulich wrote:
>>> @@ -8517,6 +8690,142 @@ x86_emulate(
>>>  #undef vex
>>>  #undef ea
>>>  
>>> +int x86_emul_rmw(
>>> +    void *ptr,
>>> +    unsigned int bytes,
>>> +    uint32_t *eflags,
>>> +    struct x86_emulate_state *state,
>>> +    struct x86_emulate_ctxt *ctxt)
>>> +{
>>> +    unsigned long *dst = ptr;
>>> +
>>> +    ASSERT(bytes == state->op_bytes);
>>> +
>>> +#ifdef __x86_64__
>>> +# define JCXZ "jrcxz"
>>> +#else
>>> +# define JCXZ "jecxz"
>>> +#endif
>>> +
>>> +#define COND_LOCK(op) \
>>> +    JCXZ " .L" #op "%=\n\t" \
>>> +    "lock\n" \
>>> +    ".L" #op "%=:\n\t" \
>>> +    #op
>> I'd forgotten that these encoding of jmp existed, but various ORMs
>> suggest that it is far slower to execute than other jumps.
>>
>> Irrespective of the instruction latency argument, you'll get better code
>> generation with cond_op() looking rather more like:
>>
>> cmpb $0, %[lock]
>> je 1f
>> lock
>> 1: #op
>>
>> which will most likely cause the cmp to be encoded with a memory
>> operand, rather than forcing it the value into %ecx.
> You're missing the main point of having chosen J{E,R}CXZ (arguably
> I should have added a comment, and I now will): This operation sits
> inside the range where we have loaded guest EFLAGS (the status
> ones) into the hardware EFLAGS register. For ADC, SBB, RCL, and
> RCR to function we must not alter CF here, and for flags not
> modified by an insn (e.g. everything other than CF for BT*) we
> must not alter any of the other status flags.

Ah ok - very good justification.  Subject to a suitable code comment, my
ack still stands.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 10/14] x86/HVM: do actual CMPXCHG in hvmemul_cmpxchg()
  2018-03-15 13:10 ` [PATCH v5 10/14] x86/HVM: do actual CMPXCHG in hvmemul_cmpxchg() Jan Beulich
@ 2018-03-15 15:28   ` Andrew Cooper
  0 siblings, 0 replies; 33+ messages in thread
From: Andrew Cooper @ 2018-03-15 15:28 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: George Dunlap


[-- Attachment #1.1: Type: text/plain, Size: 281 bytes --]

On 15/03/18 13:10, Jan Beulich wrote:
> ..., at least as far as currently possible, i.e. when a mapping can be
> obtained.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

[-- Attachment #1.2: Type: text/html, Size: 871 bytes --]

[-- Attachment #2: Type: text/plain, Size: 157 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 03/14] x86emul: abstract out XCRn accesses
  2018-03-15 13:04 ` [PATCH v5 03/14] x86emul: abstract out XCRn accesses Jan Beulich
  2018-03-15 13:35   ` Andrew Cooper
@ 2018-03-15 15:41   ` Andrew Cooper
  2018-03-15 16:08     ` Jan Beulich
  2018-03-19 13:56   ` Paul Durrant
  2018-03-20 17:22   ` George Dunlap
  3 siblings, 1 reply; 33+ messages in thread
From: Andrew Cooper @ 2018-03-15 15:41 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: George Dunlap, Paul Durrant

On 15/03/18 13:04, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate.c
> @@ -42,3 +42,50 @@
>  })
>  
>  #include "x86_emulate/x86_emulate.c"
> +
> +int x86emul_read_xcr(unsigned int reg, uint64_t *val,
> +                     struct x86_emulate_ctxt *ctxt)
> +{
> +    switch ( reg )
> +    {
> +    case 0:
> +        *val = current->arch.xcr0;
> +        return X86EMUL_OKAY;
> +
> +    case 1:
> +        if ( cpu_has_xgetbv1 && current->domain->arch.cpuid->xstate.xgetbv1 )
> +            break;
> +        /* fall through */
> +    default:
> +        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
> +        return X86EMUL_EXCEPTION;
> +    }
> +
> +    *val = xgetbv(reg);
> +
> +    return X86EMUL_OKAY;
> +}
> +
> +int x86emul_write_xcr(unsigned int reg, uint64_t val,
> +                      struct x86_emulate_ctxt *ctxt)
> +{
> +    switch ( reg )
> +    {
> +    case 0:
> +        break;
> +
> +    case 1:
> +        if ( cpu_has_xgetbv1 && current->domain->arch.cpuid->xstate.xgetbv1 )
> +            break;
> +        /* fall through */

Actually, this is wrong.  cpu_has_xgetbv1 applies only to the read
side.  xsetbv[1] is still strictly reserved and yields #GP.  (Given the
way other bits in xcr0 work, I wouldn't be surprised if xsetbv[1] is
reserved forever more.)

I'd just drop this case block and let 1 fall into the default case,
rather than relying on the sanity check in handle_xsetbv()

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 12/14] x86/HVM: use x86emul_write_xcr()
  2018-03-15 13:12 ` [PATCH v5 12/14] x86/HVM: use x86emul_write_xcr() Jan Beulich
@ 2018-03-15 15:43   ` Andrew Cooper
  2018-03-15 16:05     ` Boris Ostrovsky
  2018-03-19 12:57   ` Tian, Kevin
  1 sibling, 1 reply; 33+ messages in thread
From: Andrew Cooper @ 2018-03-15 15:43 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: George Dunlap, Kevin Tian, Boris Ostrovsky, Jun Nakajima,
	Suravee Suthikulpanit

On 15/03/18 13:12, Jan Beulich wrote:
> ...  instead of directly calling handle_xsetbv(), to make use of the
> additional checking there.
>
> Also don't call hvm_monitor_crX(XCR0, ...) for indexes other than zero
> anymore.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 12/14] x86/HVM: use x86emul_write_xcr()
  2018-03-15 15:43   ` Andrew Cooper
@ 2018-03-15 16:05     ` Boris Ostrovsky
  0 siblings, 0 replies; 33+ messages in thread
From: Boris Ostrovsky @ 2018-03-15 16:05 UTC (permalink / raw)
  To: Andrew Cooper, Jan Beulich, xen-devel
  Cc: George Dunlap, Kevin Tian, Jun Nakajima, Suravee Suthikulpanit

On 03/15/2018 11:43 AM, Andrew Cooper wrote:
> On 15/03/18 13:12, Jan Beulich wrote:
>> ...  instead of directly calling handle_xsetbv(), to make use of the
>> additional checking there.
>>
>> Also don't call hvm_monitor_crX(XCR0, ...) for indexes other than zero
>> anymore.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 03/14] x86emul: abstract out XCRn accesses
  2018-03-15 15:41   ` Andrew Cooper
@ 2018-03-15 16:08     ` Jan Beulich
  0 siblings, 0 replies; 33+ messages in thread
From: Jan Beulich @ 2018-03-15 16:08 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: George Dunlap, xen-devel, Paul Durrant

>>> On 15.03.18 at 16:41, <andrew.cooper3@citrix.com> wrote:
> On 15/03/18 13:04, Jan Beulich wrote:
>> --- a/xen/arch/x86/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate.c
>> @@ -42,3 +42,50 @@
>>  })
>>  
>>  #include "x86_emulate/x86_emulate.c"
>> +
>> +int x86emul_read_xcr(unsigned int reg, uint64_t *val,
>> +                     struct x86_emulate_ctxt *ctxt)
>> +{
>> +    switch ( reg )
>> +    {
>> +    case 0:
>> +        *val = current->arch.xcr0;
>> +        return X86EMUL_OKAY;
>> +
>> +    case 1:
>> +        if ( cpu_has_xgetbv1 && current->domain->arch.cpuid->xstate.xgetbv1 )
>> +            break;
>> +        /* fall through */
>> +    default:
>> +        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
>> +        return X86EMUL_EXCEPTION;
>> +    }
>> +
>> +    *val = xgetbv(reg);
>> +
>> +    return X86EMUL_OKAY;
>> +}
>> +
>> +int x86emul_write_xcr(unsigned int reg, uint64_t val,
>> +                      struct x86_emulate_ctxt *ctxt)
>> +{
>> +    switch ( reg )
>> +    {
>> +    case 0:
>> +        break;
>> +
>> +    case 1:
>> +        if ( cpu_has_xgetbv1 && current->domain->arch.cpuid->xstate.xgetbv1 )
>> +            break;
>> +        /* fall through */
> 
> Actually, this is wrong.  cpu_has_xgetbv1 applies only to the read
> side.  xsetbv[1] is still strictly reserved and yields #GP.  (Given the
> way other bits in xcr0 work, I wouldn't be surprised if xsetbv[1] is
> reserved forever more.)
> 
> I'd just drop this case block and let 1 fall into the default case,
> rather than relying on the sanity check in handle_xsetbv()

Oh, indeed, this was rather silly of me.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 12/14] x86/HVM: use x86emul_write_xcr()
  2018-03-15 13:12 ` [PATCH v5 12/14] x86/HVM: use x86emul_write_xcr() Jan Beulich
  2018-03-15 15:43   ` Andrew Cooper
@ 2018-03-19 12:57   ` Tian, Kevin
  1 sibling, 0 replies; 33+ messages in thread
From: Tian, Kevin @ 2018-03-19 12:57 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: George Dunlap, Andrew Cooper, Boris Ostrovsky, Nakajima, Jun,
	Suravee Suthikulpanit

> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Thursday, March 15, 2018 9:12 PM
> 
> ...  instead of directly calling handle_xsetbv(), to make use of the
> additional checking there.
> 
> Also don't call hvm_monitor_crX(XCR0, ...) for indexes other than zero
> anymore.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 03/14] x86emul: abstract out XCRn accesses
  2018-03-15 13:04 ` [PATCH v5 03/14] x86emul: abstract out XCRn accesses Jan Beulich
  2018-03-15 13:35   ` Andrew Cooper
  2018-03-15 15:41   ` Andrew Cooper
@ 2018-03-19 13:56   ` Paul Durrant
  2018-03-20 17:22   ` George Dunlap
  3 siblings, 0 replies; 33+ messages in thread
From: Paul Durrant @ 2018-03-19 13:56 UTC (permalink / raw)
  To: 'Jan Beulich', xen-devel; +Cc: Andrew Cooper, George Dunlap

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 15 March 2018 13:04
> To: xen-devel <xen-devel@lists.xenproject.org>
> Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>; Paul Durrant
> <Paul.Durrant@citrix.com>; George Dunlap <George.Dunlap@citrix.com>
> Subject: [PATCH v5 03/14] x86emul: abstract out XCRn accesses
> 
> Use hooks, just like done for other special purpose registers.
> 
> This includes moving XCR0 checks from hvmemul_get_fpu() to the emulator
> itself as well as adding support for XGETBV emulation.
> 
> For now fuzzer reads will obtain the real values (minus the fuzzing of
> the hook pointer itself).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

hvm/emulate parts...

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> ---
> v5: Move index validation into hook functions. Introduce
>     x86emul_{read,write}_xcr().
> v4: Have hvmemul_read_xcr() raise an exception instead of returning
>     X86EMUL_UNHANDLEABLE for invalid indexes. Introduce xgetbv() and add
>     volatile to the asm() moved there. Split out _XSTATE_* movement.
> v2: Re-base.
> 
> --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
> +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
> @@ -459,6 +459,8 @@ static int fuzz_write_cr(
>      return X86EMUL_OKAY;
>  }
> 
> +#define fuzz_read_xcr emul_test_read_xcr
> +
>  enum {
>      MSRI_IA32_SYSENTER_CS,
>      MSRI_IA32_SYSENTER_ESP,
> @@ -577,6 +579,7 @@ static const struct x86_emulate_ops all_
>      SET(write_io),
>      SET(read_cr),
>      SET(write_cr),
> +    SET(read_xcr),
>      SET(read_msr),
>      SET(write_msr),
>      SET(wbinvd),
> @@ -685,6 +688,7 @@ enum {
>      HOOK_write_cr,
>      HOOK_read_dr,
>      HOOK_write_dr,
> +    HOOK_read_xcr,
>      HOOK_read_msr,
>      HOOK_write_msr,
>      HOOK_wbinvd,
> @@ -729,6 +733,7 @@ static void disable_hooks(struct x86_emu
>      MAYBE_DISABLE_HOOK(write_io);
>      MAYBE_DISABLE_HOOK(read_cr);
>      MAYBE_DISABLE_HOOK(write_cr);
> +    MAYBE_DISABLE_HOOK(read_xcr);
>      MAYBE_DISABLE_HOOK(read_msr);
>      MAYBE_DISABLE_HOOK(write_msr);
>      MAYBE_DISABLE_HOOK(wbinvd);
> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -371,6 +371,7 @@ static struct x86_emulate_ops emulops =
>      .read_segment = read_segment,
>      .cpuid      = emul_test_cpuid,
>      .read_cr    = emul_test_read_cr,
> +    .read_xcr   = emul_test_read_xcr,
>      .read_msr   = read_msr,
>      .get_fpu    = emul_test_get_fpu,
>      .put_fpu    = emul_test_put_fpu,
> --- a/tools/tests/x86_emulator/x86-emulate.c
> +++ b/tools/tests/x86_emulator/x86-emulate.c
> @@ -163,6 +163,35 @@ int emul_test_read_cr(
>      return X86EMUL_UNHANDLEABLE;
>  }
> 
> +int emul_test_read_xcr(
> +    unsigned int reg,
> +    uint64_t *val,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    uint32_t lo, hi;
> +
> +    ASSERT(cpu_has_xsave);
> +
> +    switch ( reg )
> +    {
> +    case 0:
> +        break;
> +
> +    case 1:
> +        if ( cpu_has_xgetbv1 )
> +            break;
> +        /* fall through */
> +    default:
> +        x86_emul_hw_exception(13 /* #GP */, 0, ctxt);
> +        return X86EMUL_EXCEPTION;
> +    }
> +
> +    asm ( "xgetbv" : "=a" (lo), "=d" (hi) : "c" (reg) );
> +    *val = lo | ((uint64_t)hi << 32);
> +
> +    return X86EMUL_OKAY;
> +}
> +
>  int emul_test_get_fpu(
>      void (*exception_callback)(void *, struct cpu_user_regs *),
>      void *exception_callback_arg,
> --- a/tools/tests/x86_emulator/x86-emulate.h
> +++ b/tools/tests/x86_emulator/x86-emulate.h
> @@ -186,6 +186,16 @@ static inline uint64_t xgetbv(uint32_t x
>      (res.b & (1U << 5)) != 0; \
>  })
> 
> +#define cpu_has_xgetbv1 ({ \
> +    struct cpuid_leaf res; \
> +    emul_test_cpuid(1, 0, &res, NULL); \
> +    if ( !(res.c & (1U << 27)) ) \
> +        res.a = 0; \
> +    else \
> +        emul_test_cpuid(0xd, 1, &res, NULL); \
> +    (res.a & (1U << 2)) != 0; \
> +})
> +
>  #define cpu_has_bmi1 ({ \
>      struct cpuid_leaf res; \
>      emul_test_cpuid(7, 0, &res, NULL); \
> @@ -247,6 +257,11 @@ int emul_test_read_cr(
>      unsigned long *val,
>      struct x86_emulate_ctxt *ctxt);
> 
> +int emul_test_read_xcr(
> +    unsigned int reg,
> +    uint64_t *val,
> +    struct x86_emulate_ctxt *ctxt);
> +
>  int emul_test_get_fpu(
>      void (*exception_callback)(void *, struct cpu_user_regs *),
>      void *exception_callback_arg,
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -1826,6 +1826,29 @@ static int hvmemul_write_cr(
>      return rc;
>  }
> 
> +static int hvmemul_read_xcr(
> +    unsigned int reg,
> +    uint64_t *val,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    int rc = x86emul_read_xcr(reg, val, ctxt);
> +
> +    if ( rc == X86EMUL_OKAY )
> +        HVMTRACE_LONG_2D(XCR_READ, reg, TRC_PAR_LONG(*val));
> +
> +    return rc;
> +}
> +
> +static int hvmemul_write_xcr(
> +    unsigned int reg,
> +    uint64_t val,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    HVMTRACE_LONG_2D(XCR_WRITE, reg, TRC_PAR_LONG(val));
> +
> +    return x86emul_write_xcr(reg, val, ctxt);
> +}
> +
>  static int hvmemul_read_msr(
>      unsigned int reg,
>      uint64_t *val,
> @@ -1874,22 +1897,6 @@ static int hvmemul_get_fpu(
>  {
>      struct vcpu *curr = current;
> 
> -    switch ( type )
> -    {
> -    case X86EMUL_FPU_fpu:
> -    case X86EMUL_FPU_wait:
> -    case X86EMUL_FPU_mmx:
> -    case X86EMUL_FPU_xmm:
> -        break;
> -    case X86EMUL_FPU_ymm:
> -        if ( !(curr->arch.xcr0 & X86_XCR0_SSE) ||
> -             !(curr->arch.xcr0 & X86_XCR0_YMM) )
> -            return X86EMUL_UNHANDLEABLE;
> -        break;
> -    default:
> -        return X86EMUL_UNHANDLEABLE;
> -    }
> -
>      if ( !curr->fpu_dirtied )
>          hvm_funcs.fpu_dirty_intercept();
>      else if ( type == X86EMUL_FPU_fpu )
> @@ -2073,6 +2080,8 @@ static const struct x86_emulate_ops hvm_
>      .write_io      = hvmemul_write_io,
>      .read_cr       = hvmemul_read_cr,
>      .write_cr      = hvmemul_write_cr,
> +    .read_xcr      = hvmemul_read_xcr,
> +    .write_xcr     = hvmemul_write_xcr,
>      .read_msr      = hvmemul_read_msr,
>      .write_msr     = hvmemul_write_msr,
>      .wbinvd        = hvmemul_wbinvd,
> @@ -2098,6 +2107,8 @@ static const struct x86_emulate_ops hvm_
>      .write_io      = hvmemul_write_io_discard,
>      .read_cr       = hvmemul_read_cr,
>      .write_cr      = hvmemul_write_cr,
> +    .read_xcr      = hvmemul_read_xcr,
> +    .write_xcr     = hvmemul_write_xcr,
>      .read_msr      = hvmemul_read_msr,
>      .write_msr     = hvmemul_write_msr_discard,
>      .wbinvd        = hvmemul_wbinvd_discard,
> --- a/xen/arch/x86/pv/emul-priv-op.c
> +++ b/xen/arch/x86/pv/emul-priv-op.c
> @@ -1317,6 +1317,7 @@ static const struct x86_emulate_ops priv
>      .write_cr            = write_cr,
>      .read_dr             = read_dr,
>      .write_dr            = write_dr,
> +    .write_xcr           = x86emul_write_xcr,
>      .read_msr            = read_msr,
>      .write_msr           = write_msr,
>      .cpuid               = pv_emul_cpuid,
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -1114,10 +1114,30 @@ static int _get_fpu(
>      struct x86_emulate_ctxt *ctxt,
>      const struct x86_emulate_ops *ops)
>  {
> +    uint64_t xcr0;
>      int rc;
> 
>      fail_if(!ops->get_fpu);
>      ASSERT(type != X86EMUL_FPU_none);
> +
> +    if ( type < X86EMUL_FPU_ymm || !ops->read_xcr ||
> +         ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY )
> +    {
> +        ASSERT(!ctxt->event_pending);
> +        xcr0 = 0;
> +    }
> +
> +    switch ( type )
> +    {
> +    case X86EMUL_FPU_ymm:
> +        if ( !(xcr0 & X86_XCR0_SSE) || !(xcr0 & X86_XCR0_YMM) )
> +            return X86EMUL_UNHANDLEABLE;
> +        break;
> +
> +    default:
> +        break;
> +    }
> +
>      rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);
> 
>      if ( rc == X86EMUL_OKAY )
> @@ -5006,18 +5026,31 @@ x86_emulate(
>                  _regs.eflags |= X86_EFLAGS_AC;
>              break;
> 
> -#ifdef __XEN__
> +        case 0xd0: /* xgetbv */
> +            generate_exception_if(vex.pfx, EXC_UD);
> +            if ( !ops->read_cr || !ops->read_xcr ||
> +                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
> +                cr4 = 0;
> +            generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);
> +            rc = ops->read_xcr(_regs.ecx, &msr_val, ctxt);
> +            if ( rc != X86EMUL_OKAY )
> +                goto done;
> +            _regs.r(ax) = (uint32_t)msr_val;
> +            _regs.r(dx) = msr_val >> 32;
> +            break;
> +
>          case 0xd1: /* xsetbv */
>              generate_exception_if(vex.pfx, EXC_UD);
> -            if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
> +            if ( !ops->read_cr || !ops->write_xcr ||
> +                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
>                  cr4 = 0;
>              generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);
> -            generate_exception_if(!mode_ring0() ||
> -                                  handle_xsetbv(_regs.ecx,
> -                                                _regs.eax | (_regs.rdx << 32)),
> -                                  EXC_GP, 0);
> +            generate_exception_if(!mode_ring0(), EXC_GP, 0);
> +            rc = ops->write_xcr(_regs.ecx,
> +                                _regs.eax | ((uint64_t)_regs.edx << 32), ctxt);
> +            if ( rc != X86EMUL_OKAY )
> +                goto done;
>              break;
> -#endif
> 
>          case 0xd4: /* vmfunc */
>              generate_exception_if(vex.pfx, EXC_UD);
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -398,6 +398,24 @@ struct x86_emulate_ops
>          struct x86_emulate_ctxt *ctxt);
> 
>      /*
> +     * read_xcr: Read from extended control register.
> +     *  @reg:   [IN ] Register to read.
> +     */
> +    int (*read_xcr)(
> +        unsigned int reg,
> +        uint64_t *val,
> +        struct x86_emulate_ctxt *ctxt);
> +
> +    /*
> +     * write_xcr: Write to extended control register.
> +     *  @reg:   [IN ] Register to write.
> +     */
> +    int (*write_xcr)(
> +        unsigned int reg,
> +        uint64_t val,
> +        struct x86_emulate_ctxt *ctxt);
> +
> +    /*
>       * read_msr: Read from model-specific register.
>       *  @reg:   [IN ] Register to read.
>       */
> @@ -683,6 +701,11 @@ static inline void x86_emulate_free_stat
>  void x86_emulate_free_state(struct x86_emulate_state *state);
>  #endif
> 
> +int x86emul_read_xcr(unsigned int reg, uint64_t *val,
> +                     struct x86_emulate_ctxt *ctxt);
> +int x86emul_write_xcr(unsigned int reg, uint64_t val,
> +                      struct x86_emulate_ctxt *ctxt);
> +
>  #endif
> 
>  static inline void x86_emul_hw_exception(
> --- a/xen/arch/x86/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate.c
> @@ -42,3 +42,50 @@
>  })
> 
>  #include "x86_emulate/x86_emulate.c"
> +
> +int x86emul_read_xcr(unsigned int reg, uint64_t *val,
> +                     struct x86_emulate_ctxt *ctxt)
> +{
> +    switch ( reg )
> +    {
> +    case 0:
> +        *val = current->arch.xcr0;
> +        return X86EMUL_OKAY;
> +
> +    case 1:
> +        if ( cpu_has_xgetbv1 && current->domain->arch.cpuid->xstate.xgetbv1
> )
> +            break;
> +        /* fall through */
> +    default:
> +        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
> +        return X86EMUL_EXCEPTION;
> +    }
> +
> +    *val = xgetbv(reg);
> +
> +    return X86EMUL_OKAY;
> +}
> +
> +int x86emul_write_xcr(unsigned int reg, uint64_t val,
> +                      struct x86_emulate_ctxt *ctxt)
> +{
> +    switch ( reg )
> +    {
> +    case 0:
> +        break;
> +
> +    case 1:
> +        if ( cpu_has_xgetbv1 && current->domain->arch.cpuid->xstate.xgetbv1
> )
> +            break;
> +        /* fall through */
> +    default:
> +    gp_fault:
> +        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
> +        return X86EMUL_EXCEPTION;
> +    }
> +
> +    if ( unlikely(handle_xsetbv(reg, val) != 0) )
> +        goto gp_fault;
> +
> +    return X86EMUL_OKAY;
> +}
> --- a/xen/include/asm-x86/hvm/trace.h
> +++ b/xen/include/asm-x86/hvm/trace.h
> @@ -33,6 +33,8 @@
>  #define DO_TRC_HVM_CR_WRITE64  DEFAULT_HVM_REGACCESS
>  #define DO_TRC_HVM_DR_READ     DEFAULT_HVM_REGACCESS
>  #define DO_TRC_HVM_DR_WRITE    DEFAULT_HVM_REGACCESS
> +#define DO_TRC_HVM_XCR_READ64  DEFAULT_HVM_REGACCESS
> +#define DO_TRC_HVM_XCR_WRITE64 DEFAULT_HVM_REGACCESS
>  #define DO_TRC_HVM_MSR_READ    DEFAULT_HVM_REGACCESS
>  #define DO_TRC_HVM_MSR_WRITE   DEFAULT_HVM_REGACCESS
>  #define DO_TRC_HVM_RDTSC       DEFAULT_HVM_REGACCESS
> --- a/xen/include/asm-x86/xstate.h
> +++ b/xen/include/asm-x86/xstate.h
> @@ -109,6 +109,17 @@ int xstate_alloc_save_area(struct vcpu *
>  void xstate_init(struct cpuinfo_x86 *c);
>  unsigned int xstate_ctxt_size(u64 xcr0);
> 
> +static inline uint64_t xgetbv(unsigned int index)
> +{
> +    uint32_t lo, hi;
> +
> +    ASSERT(index); /* get_xcr0() should be used instead. */
> +    asm volatile ( ".byte 0x0f,0x01,0xd0" /* xgetbv */
> +                   : "=a" (lo), "=d" (hi) : "c" (index) );
> +
> +    return lo | ((uint64_t)hi << 32);
> +}
> +
>  static inline bool xstate_all(const struct vcpu *v)
>  {
>      /*
> --- a/xen/include/public/trace.h
> +++ b/xen/include/public/trace.h
> @@ -235,6 +235,8 @@
>  #define TRC_HVM_TRAP             (TRC_HVM_HANDLER + 0x23)
>  #define TRC_HVM_TRAP_DEBUG       (TRC_HVM_HANDLER + 0x24)
>  #define TRC_HVM_VLAPIC           (TRC_HVM_HANDLER + 0x25)
> +#define TRC_HVM_XCR_READ64      (TRC_HVM_HANDLER + TRC_64_FLAG +
> 0x26)
> +#define TRC_HVM_XCR_WRITE64     (TRC_HVM_HANDLER + TRC_64_FLAG
> + 0x27)
> 
>  #define TRC_HVM_IOPORT_WRITE    (TRC_HVM_HANDLER + 0x216)
>  #define TRC_HVM_IOMEM_WRITE     (TRC_HVM_HANDLER + 0x217)
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 03/14] x86emul: abstract out XCRn accesses
  2018-03-15 13:04 ` [PATCH v5 03/14] x86emul: abstract out XCRn accesses Jan Beulich
                     ` (2 preceding siblings ...)
  2018-03-19 13:56   ` Paul Durrant
@ 2018-03-20 17:22   ` George Dunlap
  3 siblings, 0 replies; 33+ messages in thread
From: George Dunlap @ 2018-03-20 17:22 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: George Dunlap, Andrew Cooper, Paul Durrant

On 03/15/2018 01:04 PM, Jan Beulich wrote:
> Use hooks, just like done for other special purpose registers.
> 
> This includes moving XCR0 checks from hvmemul_get_fpu() to the emulator
> itself as well as adding support for XGETBV emulation.
> 
> For now fuzzer reads will obtain the real values (minus the fuzzing of
> the hook pointer itself).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Tracing parts:

Reviewed-by: George Dunlap <george.dunlap@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 05/14] x86/HVM: eliminate custom #MF/#XM handling
  2018-03-15 13:06 ` [PATCH v5 05/14] x86/HVM: eliminate custom #MF/#XM handling Jan Beulich
@ 2018-03-22 14:12   ` Roger Pau Monné
  2018-03-22 14:38     ` Jan Beulich
  0 siblings, 1 reply; 33+ messages in thread
From: Roger Pau Monné @ 2018-03-22 14:12 UTC (permalink / raw)
  To: Jan Beulich; +Cc: George Dunlap, xen-devel, Andrew Cooper

On Thu, Mar 15, 2018 at 07:06:36AM -0600, Jan Beulich wrote:
> @@ -8478,7 +8411,8 @@ x86_emulate(
>      }
>  
>   complete_insn: /* Commit shadow register state. */
> -    put_fpu(&fic, false, state, ctxt, ops);
> +    put_fpu(fpu_type, false, state, ctxt, ops);
> +    fpu_type = X86EMUL_FPU_none;
>  
>      /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
>      if ( !mode_64bit() )
> @@ -8502,13 +8436,22 @@ x86_emulate(
>      ctxt->regs->eflags &= ~X86_EFLAGS_RF;
>  
>   done:
> -    put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, state, ctxt, ops);
> +    put_fpu(fpu_type, insn_bytes > 0 && dst.type == OP_MEM, state, ctxt, ops);
>      put_stub(stub);
>      return rc;
>  #undef state
>  
>  #ifdef __XEN__
>   emulation_stub_failure:
> +    generate_exception_if(stub_exn.info.fields.trapnr == EXC_MF, EXC_MF);
> +    if ( stub_exn.info.fields.trapnr == EXC_XM )
> +    {
> +        unsigned long cr4;
> +
> +        if ( !ops->read_cr || !ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY )

Is the second expression in the above line missing parentheses:

if ( !ops->read_cr || !(ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY) )

Or should this be:

if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )

clang complains with:

In file included from x86_emulate.c:44:
./x86_emulate/x86_emulate.c:8665:31: error: logical not is only applied to the left hand side of
      this comparison [-Werror,-Wlogical-not-parentheses]
        if ( !ops->read_cr || !ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY )
                              ^                            ~~
./x86_emulate/x86_emulate.c:8665:31: note: add parentheses after the '!' to evaluate the comparison
      first
        if ( !ops->read_cr || !ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY )
                              ^
                               (                                          )
./x86_emulate/x86_emulate.c:8665:31: note: add parentheses around left hand side expression to
      silence this warning
        if ( !ops->read_cr || !ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY )
                              ^
                              (                           )
1 error generated.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5 05/14] x86/HVM: eliminate custom #MF/#XM handling
  2018-03-22 14:12   ` Roger Pau Monné
@ 2018-03-22 14:38     ` Jan Beulich
  0 siblings, 0 replies; 33+ messages in thread
From: Jan Beulich @ 2018-03-22 14:38 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: George Dunlap, Andrew Cooper, xen-devel

>>> On 22.03.18 at 15:12, <roger.pau@citrix.com> wrote:
> On Thu, Mar 15, 2018 at 07:06:36AM -0600, Jan Beulich wrote:
>> @@ -8478,7 +8411,8 @@ x86_emulate(
>>      }
>>  
>>   complete_insn: /* Commit shadow register state. */
>> -    put_fpu(&fic, false, state, ctxt, ops);
>> +    put_fpu(fpu_type, false, state, ctxt, ops);
>> +    fpu_type = X86EMUL_FPU_none;
>>  
>>      /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
>>      if ( !mode_64bit() )
>> @@ -8502,13 +8436,22 @@ x86_emulate(
>>      ctxt->regs->eflags &= ~X86_EFLAGS_RF;
>>  
>>   done:
>> -    put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, state, ctxt, ops);
>> +    put_fpu(fpu_type, insn_bytes > 0 && dst.type == OP_MEM, state, ctxt, ops);
>>      put_stub(stub);
>>      return rc;
>>  #undef state
>>  
>>  #ifdef __XEN__
>>   emulation_stub_failure:
>> +    generate_exception_if(stub_exn.info.fields.trapnr == EXC_MF, EXC_MF);
>> +    if ( stub_exn.info.fields.trapnr == EXC_XM )
>> +    {
>> +        unsigned long cr4;
>> +
>> +        if ( !ops->read_cr || !ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY )
> 
> Is the second expression in the above line missing parentheses:
> 
> if ( !ops->read_cr || !(ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY) )
> 
> Or should this be:
> 
> if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )

Oops, yes indeed, the latter. Thanks for the report.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2018-03-22 14:38 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-15 12:57 [PATCH v5 00/14] x86: emulator enhancements Jan Beulich
2018-03-15 13:02 ` [PATCH v5 01/14] x86emul: support 3DNow! insns Jan Beulich
2018-03-15 13:24   ` Andrew Cooper
2018-03-15 13:03 ` [PATCH v5 02/14] x86emul: place test blobs in executable section Jan Beulich
2018-03-15 13:04 ` [PATCH v5 03/14] x86emul: abstract out XCRn accesses Jan Beulich
2018-03-15 13:35   ` Andrew Cooper
2018-03-15 13:44     ` Jan Beulich
2018-03-15 13:55       ` Andrew Cooper
2018-03-15 15:41   ` Andrew Cooper
2018-03-15 16:08     ` Jan Beulich
2018-03-19 13:56   ` Paul Durrant
2018-03-20 17:22   ` George Dunlap
2018-03-15 13:04 ` [PATCH v5 04/14] x86emul: adjust_bnd() should check XCR0 Jan Beulich
2018-03-15 13:06 ` [PATCH v5 05/14] x86/HVM: eliminate custom #MF/#XM handling Jan Beulich
2018-03-22 14:12   ` Roger Pau Monné
2018-03-22 14:38     ` Jan Beulich
2018-03-15 13:07 ` [PATCH v5 06/14] x86emul: tell cmpxchg hook whether LOCK is in effect Jan Beulich
2018-03-15 13:08 ` [PATCH v5 07/14] x86emul: correctly handle CMPXCHG* comparison failures Jan Beulich
2018-03-15 13:08 ` [PATCH v5 08/14] x86emul: add read-modify-write hook Jan Beulich
2018-03-15 14:19   ` Andrew Cooper
2018-03-15 14:46     ` Jan Beulich
2018-03-15 14:48       ` Andrew Cooper
2018-03-15 13:09 ` [PATCH v5 09/14] x86emul: also handle shifts through ->rmw() Jan Beulich
2018-03-15 14:23   ` Andrew Cooper
2018-03-15 13:10 ` [PATCH v5 10/14] x86/HVM: do actual CMPXCHG in hvmemul_cmpxchg() Jan Beulich
2018-03-15 15:28   ` Andrew Cooper
2018-03-15 13:10 ` [PATCH v5 11/14] x86/HVM: make use of new read-modify-write emulator hook Jan Beulich
2018-03-15 13:12 ` [PATCH v5 12/14] x86/HVM: use x86emul_write_xcr() Jan Beulich
2018-03-15 15:43   ` Andrew Cooper
2018-03-15 16:05     ` Boris Ostrovsky
2018-03-19 12:57   ` Tian, Kevin
2018-03-15 13:13 ` [PATCH v5 13/14] x86/shadow: fully move unmap-dest into common code Jan Beulich
2018-03-15 13:13 ` [PATCH v5 14/14] x86/shadow: fold sh_x86_emulate_{write, cmpxchg}() into their only callers Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.