All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/7] x86emul: a few small steps towards disintegration
@ 2021-08-11 12:21 Jan Beulich
  2021-08-11 12:22 ` [PATCH 1/7] x86emul: split off opcode 0f01 handling Jan Beulich
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Jan Beulich @ 2021-08-11 12:21 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

... of the huge monolithic source file. The series is largely code
movement and hence has the intention of not incurring any functional
change.

1: split off opcode 0f01 handling
2: split off opcode 0fae handling
3: split off opcode 0fc7 handling
4: split off FPU opcode handling
5: split off insn decoding
6: move x86_emul_blk() to separate source file
7: move various utility functions to separate source files

Jan



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/7] x86emul: split off opcode 0f01 handling
  2021-08-11 12:21 [PATCH 0/7] x86emul: a few small steps towards disintegration Jan Beulich
@ 2021-08-11 12:22 ` Jan Beulich
  2021-08-11 12:23 ` [PATCH 2/7] x86emul: split off opcode 0fae handling Jan Beulich
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2021-08-11 12:22 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

There's a fair amount of sub-cases (with some yet to be implemented), so
a separate function seems warranted.

Code moved gets slightly adjusted in a few places, e.g. replacing EXC_*
by X86_EXC_* (such that EXC_* don't need to move as well; we want these
to be phased out anyway).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/fuzz/x86_instruction_emulator/Makefile
+++ b/tools/fuzz/x86_instruction_emulator/Makefile
@@ -11,10 +11,13 @@ endif
 # Add libx86 to the build
 vpath %.c $(XEN_ROOT)/xen/lib/x86
 
+.PHONY: x86_emulate
 x86_emulate:
-	[ -L $@ ] || ln -sf $(XEN_ROOT)/xen/arch/x86/$@
+	mkdir -p $@
+	ln -sf $(XEN_ROOT)/xen/arch/x86/$@/*.[ch] $@/
 
-x86_emulate/%: x86_emulate ;
+x86_emulate/%.c: x86_emulate ;
+x86_emulate/%.h: x86_emulate ;
 
 x86-emulate.c x86-emulate.h wrappers.c: %:
 	[ -L $* ] || ln -sf $(XEN_ROOT)/tools/tests/x86_emulator/$*
@@ -31,18 +34,27 @@ x86.h := $(addprefix $(XEN_ROOT)/tools/i
                      cpuid.h cpuid-autogen.h)
 x86_emulate.h := x86-emulate.h x86_emulate/x86_emulate.h $(x86.h)
 
+OBJS := fuzz-emul.o x86-emulate.o
+OBJS += x86_emulate/0f01.o
+
 # x86-emulate.c will be implicit for both
-x86-emulate.o x86-emulate-cov.o: x86_emulate/x86_emulate.c $(x86_emulate.h)
+x86-emulate.o x86-emulate-cov.o: x86_emulate/x86_emulate.c $(x86_emulate.h) x86_emulate/private.h
 
 fuzz-emul.o fuzz-emulate-cov.o cpuid.o wrappers.o: $(x86_emulate.h)
 
-x86-insn-fuzzer.a: fuzz-emul.o x86-emulate.o cpuid.o
+$(filter x86_emulate/%.o,$(OBJS)): x86_emulate/%.o: x86_emulate/%.c x86_emulate/private.h $(x86_emulate.h)
+	$(CC) $(CPPFLAGS) $(CFLAGS) $(CFLAGS_$*.o) -c -o $@ $< $(APPEND_CFLAGS)
+
+$(patsubst %.o,%-cov.o,$(filter x86_emulate/%.o,$(OBJS))): x86_emulate/%-cov.o: x86_emulate/%.c x86_emulate/private.h $(x86_emulate.h)
+	$(CC) $(CPPFLAGS) $(CFLAGS) $(CFLAGS_$*.o) $(GCOV_FLAGS) -c -o $@ $< $(APPEND_CFLAGS)
+
+x86-insn-fuzzer.a: $(OBJS) cpuid.o
 	$(AR) rc $@ $^
 
-afl-harness: afl-harness.o fuzz-emul.o x86-emulate.o cpuid.o wrappers.o
+afl-harness: afl-harness.o $(OBJS) cpuid.o wrappers.o
 	$(CC) $(CFLAGS) $^ -o $@
 
-afl-harness-cov: afl-harness-cov.o fuzz-emul-cov.o x86-emulate-cov.o cpuid.o wrappers.o
+afl-harness-cov: afl-harness-cov.o $(patsubst %.o,%-cov.o,$(OBJS)) cpuid.o wrappers.o
 	$(CC) $(CFLAGS) $(GCOV_FLAGS) $^ -o $@
 
 # Common targets
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -29,7 +29,7 @@ OPMASK := avx512f avx512dq avx512bw
 
 ifeq ($(origin XEN_COMPILE_ARCH),override)
 
-HOSTCFLAGS += -m32
+HOSTCFLAGS += -m32 -I..
 
 else
 
@@ -250,7 +250,10 @@ xop.h avx512f.h: simd-fma.c
 
 endif # 32-bit override
 
-$(TARGET): x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o
+OBJS := x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o
+OBJS += x86_emulate/0f01.o
+
+$(TARGET): $(OBJS)
 	$(HOSTCC) $(HOSTCFLAGS) -o $@ $^
 
 .PHONY: clean
@@ -274,8 +277,10 @@ else
 run32 clean32: %32: %
 endif
 
+.PHONY: x86_emulate
 x86_emulate:
-	[ -L $@ ] || ln -sf $(XEN_ROOT)/xen/arch/x86/$@
+	mkdir -p $@
+	ln -sf $(XEN_ROOT)/xen/arch/x86/$@/*.[ch] $@/
 
 x86_emulate/%: x86_emulate ;
 
@@ -287,13 +292,13 @@ x86.h := $(addprefix $(XEN_ROOT)/tools/i
                      x86-vendors.h x86-defns.h msr-index.h) \
          $(addprefix $(XEN_ROOT)/tools/include/xen/lib/x86/, \
                      cpuid.h cpuid-autogen.h)
-x86_emulate.h := x86-emulate.h x86_emulate/x86_emulate.h $(x86.h)
+x86_emulate.h := x86-emulate.h x86_emulate/x86_emulate.h x86_emulate/private.h $(x86.h)
 
-x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o: %.o: %.c $(x86_emulate.h)
+$(OBJS): %.o: %.c $(x86_emulate.h)
 	$(HOSTCC) $(HOSTCFLAGS) -c -g -o $@ $<
 
 x86-emulate.o: x86_emulate/x86_emulate.c
-x86-emulate.o: HOSTCFLAGS += -D__XEN_TOOLS__
+x86-emulate.o x86_emulate/%.o: HOSTCFLAGS += -D__XEN_TOOLS__
 
 # In order for our custom .type assembler directives to reliably land after
 # gcc's, we need to keep it from re-ordering top-level constructs.
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -22,11 +22,9 @@
 
 /* For generic assembly code: use macros to define operation/operand sizes. */
 #ifdef __i386__
-# define r(name)       e ## name
 # define __OS          "l"  /* Operation Suffix */
 # define __OP          "e"  /* Operand Prefix */
 #else
-# define r(name)       r ## name
 # define __OS          "q"  /* Operation Suffix */
 # define __OP          "r"  /* Operand Prefix */
 #endif
@@ -265,12 +263,12 @@ void emul_test_put_fpu(
 
 static uint32_t pkru;
 
-static unsigned int rdpkru(void)
+unsigned int rdpkru(void)
 {
     return pkru;
 }
 
-static void wrpkru(unsigned int val)
+void wrpkru(unsigned int val)
 {
     pkru = val;
 }
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -1,3 +1,6 @@
+#ifndef X86_EMULATE_H
+#define X86_EMULATE_H
+
 #include <assert.h>
 #include <stdbool.h>
 #include <stddef.h>
@@ -128,6 +131,9 @@ static inline bool xcr0_mask(uint64_t ma
     return cpu_has_xsave && ((xgetbv(0) & mask) == mask);
 }
 
+unsigned int rdpkru(void);
+void wrpkru(unsigned int val);
+
 #define cache_line_size() (cp.basic.clflush_size * 8)
 #define cpu_has_fpu        cp.basic.fpu
 #define cpu_has_mmx        cp.basic.mmx
@@ -205,3 +211,5 @@ void emul_test_put_fpu(
     struct x86_emulate_ctxt *ctxt,
     enum x86_emulate_fpu_type backout,
     const struct x86_emul_fpu_aux *aux);
+
+#endif /* X86_EMULATE_H */
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -7,6 +7,7 @@ obj-y += mm/
 obj-$(CONFIG_XENOPROF) += oprofile/
 obj-$(CONFIG_PV) += pv/
 obj-y += x86_64/
+obj-y += x86_emulate/
 
 alternative-y := alternative.init.o
 alternative-$(CONFIG_LIVEPATCH) :=
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -23,8 +23,6 @@
 #undef cpuid
 #undef wbinvd
 
-#define r(name) r ## name
-
 #define cpu_has_amd_erratum(nr) \
         cpu_has_amd_erratum(&current_cpu_data, AMD_ERRATUM_##nr)
 
@@ -45,12 +43,6 @@
 
 #define FXSAVE_AREA current->arch.fpu_ctxt
 
-#ifndef CONFIG_HVM
-# define X86EMUL_NO_FPU
-# define X86EMUL_NO_MMX
-# define X86EMUL_NO_SIMD
-#endif
-
 #include "x86_emulate/x86_emulate.c"
 
 int x86emul_read_xcr(unsigned int reg, uint64_t *val,
--- /dev/null
+++ b/xen/arch/x86/x86_emulate/0f01.c
@@ -0,0 +1,349 @@
+/******************************************************************************
+ * 0f01.c - helper for x86_emulate.c
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ *
+ * Copyright (c) 2005-2007 Keir Fraser
+ * Copyright (c) 2005-2007 XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "private.h"
+
+#define ad_bytes (s->ad_bytes) /* for truncate_ea() */
+
+int x86emul_0f01(struct x86_emulate_state *s,
+                 struct cpu_user_regs *regs,
+                 struct operand *dst,
+                 struct x86_emulate_ctxt *ctxt,
+                 const struct x86_emulate_ops *ops)
+{
+    enum x86_segment seg = (s->modrm_reg & 1) ? x86_seg_idtr : x86_seg_gdtr;
+    int rc;
+
+    switch ( s->modrm )
+    {
+        unsigned long base, limit, cr0, cr0w, cr4;
+        struct segment_register sreg;
+        uint64_t msr_val;
+
+    case 0xca: /* clac */
+    case 0xcb: /* stac */
+        vcpu_must_have(smap);
+        generate_exception_if(s->vex.pfx || !mode_ring0(), X86_EXC_UD);
+
+        regs->eflags &= ~X86_EFLAGS_AC;
+        if ( s->modrm == 0xcb )
+            regs->eflags |= X86_EFLAGS_AC;
+        break;
+
+    case 0xd0: /* xgetbv */
+        generate_exception_if(s->vex.pfx, X86_EXC_UD);
+        if ( !ops->read_cr || !ops->read_xcr ||
+             ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
+            cr4 = 0;
+        generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), X86_EXC_UD);
+        rc = ops->read_xcr(regs->ecx, &msr_val, ctxt);
+        if ( rc != X86EMUL_OKAY )
+            goto done;
+        regs->r(ax) = (uint32_t)msr_val;
+        regs->r(dx) = msr_val >> 32;
+        break;
+
+    case 0xd1: /* xsetbv */
+        generate_exception_if(s->vex.pfx, X86_EXC_UD);
+        if ( !ops->read_cr || !ops->write_xcr ||
+             ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
+            cr4 = 0;
+        generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), X86_EXC_UD);
+        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
+        rc = ops->write_xcr(regs->ecx,
+                            regs->eax | ((uint64_t)regs->edx << 32), ctxt);
+        if ( rc != X86EMUL_OKAY )
+            goto done;
+        break;
+
+    case 0xd4: /* vmfunc */
+        generate_exception_if(s->vex.pfx, X86_EXC_UD);
+        fail_if(!ops->vmfunc);
+        if ( (rc = ops->vmfunc(ctxt)) != X86EMUL_OKAY )
+            goto done;
+        break;
+
+    case 0xd5: /* xend */
+        generate_exception_if(s->vex.pfx, X86_EXC_UD);
+        generate_exception_if(!vcpu_has_rtm(), X86_EXC_UD);
+        generate_exception_if(vcpu_has_rtm(), X86_EXC_GP, 0);
+        break;
+
+    case 0xd6: /* xtest */
+        generate_exception_if(s->vex.pfx, X86_EXC_UD);
+        generate_exception_if(!vcpu_has_rtm() && !vcpu_has_hle(),
+                              X86_EXC_UD);
+        /* Neither HLE nor RTM can be active when we get here. */
+        regs->eflags |= X86_EFLAGS_ZF;
+        break;
+
+    case 0xdf: /* invlpga */
+        fail_if(!ops->read_msr);
+        if ( (rc = ops->read_msr(MSR_EFER,
+                                 &msr_val, ctxt)) != X86EMUL_OKAY )
+            goto done;
+        /* Finding SVME set implies vcpu_has_svm(). */
+        generate_exception_if(!(msr_val & EFER_SVME) ||
+                              !in_protmode(ctxt, ops), X86_EXC_UD);
+        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
+        fail_if(!ops->tlb_op);
+        if ( (rc = ops->tlb_op(x86emul_invlpga, truncate_ea(regs->r(ax)),
+                               regs->ecx, ctxt)) != X86EMUL_OKAY )
+            goto done;
+        break;
+
+    case 0xe8:
+        switch ( s->vex.pfx )
+        {
+        case vex_none: /* serialize */
+            host_and_vcpu_must_have(serialize);
+            asm volatile ( ".byte 0x0f, 0x01, 0xe8" );
+            break;
+        case vex_f2: /* xsusldtrk */
+            vcpu_must_have(tsxldtrk);
+            /*
+             * We're never in a transactional region when coming here
+             * - nothing else to do.
+             */
+            break;
+        default:
+            return X86EMUL_UNIMPLEMENTED;
+        }
+        break;
+
+    case 0xe9:
+        switch ( s->vex.pfx )
+        {
+        case vex_f2: /* xresldtrk */
+            vcpu_must_have(tsxldtrk);
+            /*
+             * We're never in a transactional region when coming here
+             * - nothing else to do.
+             */
+            break;
+        default:
+            return X86EMUL_UNIMPLEMENTED;
+        }
+        break;
+
+    case 0xee:
+        switch ( s->vex.pfx )
+        {
+        case vex_none: /* rdpkru */
+            if ( !ops->read_cr ||
+                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
+                cr4 = 0;
+            generate_exception_if(!(cr4 & X86_CR4_PKE), X86_EXC_UD);
+            generate_exception_if(regs->ecx, X86_EXC_GP, 0);
+            regs->r(ax) = rdpkru();
+            regs->r(dx) = 0;
+            break;
+        default:
+            return X86EMUL_UNIMPLEMENTED;
+        }
+        break;
+
+    case 0xef:
+        switch ( s->vex.pfx )
+        {
+        case vex_none: /* wrpkru */
+            if ( !ops->read_cr ||
+                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
+                cr4 = 0;
+            generate_exception_if(!(cr4 & X86_CR4_PKE), X86_EXC_UD);
+            generate_exception_if(regs->ecx | regs->edx, X86_EXC_GP, 0);
+            wrpkru(regs->eax);
+            break;
+        default:
+            return X86EMUL_UNIMPLEMENTED;
+        }
+        break;
+
+    case 0xf8: /* swapgs */
+        generate_exception_if(!mode_64bit(), X86_EXC_UD);
+        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
+        fail_if(!ops->read_segment || !ops->read_msr ||
+                !ops->write_segment || !ops->write_msr);
+        if ( (rc = ops->read_segment(x86_seg_gs, &sreg,
+                                     ctxt)) != X86EMUL_OKAY ||
+             (rc = ops->read_msr(MSR_SHADOW_GS_BASE, &msr_val,
+                                 ctxt)) != X86EMUL_OKAY ||
+             (rc = ops->write_msr(MSR_SHADOW_GS_BASE, sreg.base,
+                                  ctxt)) != X86EMUL_OKAY )
+            goto done;
+        sreg.base = msr_val;
+        if ( (rc = ops->write_segment(x86_seg_gs, &sreg,
+                                      ctxt)) != X86EMUL_OKAY )
+        {
+            /* Best effort unwind (i.e. no error checking). */
+            ops->write_msr(MSR_SHADOW_GS_BASE, msr_val, ctxt);
+            goto done;
+        }
+        break;
+
+    case 0xf9: /* rdtscp */
+        fail_if(ops->read_msr == NULL);
+        if ( (rc = ops->read_msr(MSR_TSC_AUX,
+                                 &msr_val, ctxt)) != X86EMUL_OKAY )
+            goto done;
+        regs->r(cx) = (uint32_t)msr_val;
+        return X86EMUL_rdtsc;
+
+    case 0xfc: /* clzero */
+    {
+        unsigned long zero = 0;
+
+        vcpu_must_have(clzero);
+
+        base = ad_bytes == 8 ? regs->r(ax) :
+               ad_bytes == 4 ? regs->eax : regs->ax;
+        limit = ctxt->cpuid->basic.clflush_size * 8;
+        generate_exception_if(limit < sizeof(long) ||
+                              (limit & (limit - 1)), X86_EXC_UD);
+        base &= ~(limit - 1);
+        if ( ops->rep_stos )
+        {
+            unsigned long nr_reps = limit / sizeof(zero);
+
+            rc = ops->rep_stos(&zero, s->ea.mem.seg, base, sizeof(zero),
+                               &nr_reps, ctxt);
+            if ( rc == X86EMUL_OKAY )
+            {
+                base += nr_reps * sizeof(zero);
+                limit -= nr_reps * sizeof(zero);
+            }
+            else if ( rc != X86EMUL_UNHANDLEABLE )
+                goto done;
+        }
+        fail_if(limit && !ops->write);
+        while ( limit )
+        {
+            rc = ops->write(s->ea.mem.seg, base, &zero, sizeof(zero), ctxt);
+            if ( rc != X86EMUL_OKAY )
+                goto done;
+            base += sizeof(zero);
+            limit -= sizeof(zero);
+        }
+        break;
+    }
+
+#define _GRP7(mod, reg) \
+        (((mod) << 6) | ((reg) << 3)) ... (((mod) << 6) | ((reg) << 3) | 7)
+#define GRP7_MEM(reg) _GRP7(0, reg): case _GRP7(1, reg): case _GRP7(2, reg)
+#define GRP7_ALL(reg) GRP7_MEM(reg): case _GRP7(3, reg)
+
+    case GRP7_MEM(0): /* sgdt */
+    case GRP7_MEM(1): /* sidt */
+        ASSERT(s->ea.type == OP_MEM);
+        generate_exception_if(umip_active(ctxt, ops), X86_EXC_GP, 0);
+        fail_if(!ops->read_segment || !ops->write);
+        if ( (rc = ops->read_segment(seg, &sreg, ctxt)) )
+            goto done;
+        if ( mode_64bit() )
+            s->op_bytes = 8;
+        else if ( s->op_bytes == 2 )
+        {
+            sreg.base &= 0xffffff;
+            s->op_bytes = 4;
+        }
+        if ( (rc = ops->write(s->ea.mem.seg, s->ea.mem.off, &sreg.limit,
+                              2, ctxt)) != X86EMUL_OKAY ||
+             (rc = ops->write(s->ea.mem.seg, truncate_ea(s->ea.mem.off + 2),
+                              &sreg.base, s->op_bytes, ctxt)) != X86EMUL_OKAY )
+            goto done;
+        break;
+
+    case GRP7_MEM(2): /* lgdt */
+    case GRP7_MEM(3): /* lidt */
+        ASSERT(s->ea.type == OP_MEM);
+        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
+        fail_if(ops->write_segment == NULL);
+        memset(&sreg, 0, sizeof(sreg));
+        if ( (rc = read_ulong(s->ea.mem.seg, s->ea.mem.off,
+                              &limit, 2, ctxt, ops)) ||
+             (rc = read_ulong(s->ea.mem.seg, truncate_ea(s->ea.mem.off + 2),
+                              &base, mode_64bit() ? 8 : 4, ctxt, ops)) )
+            goto done;
+        generate_exception_if(!is_canonical_address(base), X86_EXC_GP, 0);
+        sreg.base = base;
+        sreg.limit = limit;
+        if ( !mode_64bit() && s->op_bytes == 2 )
+            sreg.base &= 0xffffff;
+        if ( (rc = ops->write_segment(seg, &sreg, ctxt)) )
+            goto done;
+        break;
+
+    case GRP7_ALL(4): /* smsw */
+        generate_exception_if(umip_active(ctxt, ops), X86_EXC_GP, 0);
+        if ( s->ea.type == OP_MEM )
+        {
+            fail_if(!ops->write);
+            s->desc |= Mov; /* force writeback */
+            s->ea.bytes = 2;
+        }
+        else
+            s->ea.bytes = s->op_bytes;
+        *dst = s->ea;
+        fail_if(ops->read_cr == NULL);
+        if ( (rc = ops->read_cr(0, &dst->val, ctxt)) )
+            goto done;
+        break;
+
+    case GRP7_ALL(6): /* lmsw */
+        fail_if(ops->read_cr == NULL);
+        fail_if(ops->write_cr == NULL);
+        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
+        if ( (rc = ops->read_cr(0, &cr0, ctxt)) )
+            goto done;
+        if ( s->ea.type == OP_REG )
+            cr0w = *s->ea.reg;
+        else if ( (rc = read_ulong(s->ea.mem.seg, s->ea.mem.off,
+                                   &cr0w, 2, ctxt, ops)) )
+            goto done;
+        /* LMSW can: (1) set bits 0-3; (2) clear bits 1-3. */
+        cr0 = (cr0 & ~0xe) | (cr0w & 0xf);
+        if ( (rc = ops->write_cr(0, cr0, ctxt)) )
+            goto done;
+        break;
+
+    case GRP7_MEM(7): /* invlpg */
+        ASSERT(s->ea.type == OP_MEM);
+        generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
+        fail_if(!ops->tlb_op);
+        if ( (rc = ops->tlb_op(x86emul_invlpg, s->ea.mem.off, s->ea.mem.seg,
+                               ctxt)) != X86EMUL_OKAY )
+            goto done;
+        break;
+
+#undef GRP7_ALL
+#undef GRP7_MEM
+#undef _GRP7
+
+    default:
+        return X86EMUL_UNIMPLEMENTED;
+    }
+
+    rc = X86EMUL_OKAY;
+
+ done:
+    return rc;
+}
--- /dev/null
+++ b/xen/arch/x86/x86_emulate/Makefile
@@ -0,0 +1 @@
+obj-y += 0f01.o
--- /dev/null
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -0,0 +1,531 @@
+/******************************************************************************
+ * private.h - interface between x86_emulate.c and its helpers
+ *
+ * Copyright (c) 2005-2007 Keir Fraser
+ * Copyright (c) 2005-2007 XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __XEN__
+
+# include <xen/kernel.h>
+# include <asm/msr-index.h>
+# include <asm/x86_emulate.h>
+
+# ifndef CONFIG_HVM
+#  define X86EMUL_NO_FPU
+#  define X86EMUL_NO_MMX
+#  define X86EMUL_NO_SIMD
+# endif
+
+#else /* !__XEN__ */
+# include "x86-emulate.h"
+#endif
+
+#ifdef __i386__
+# define mode_64bit() false
+# define r(name) e ## name
+#else
+# define mode_64bit() (ctxt->addr_size == 64)
+# define r(name) r ## name
+#endif
+
+/* Operand sizes: 8-bit operands or specified/overridden size. */
+#define ByteOp      (1<<0) /* 8-bit operands. */
+/* Destination operand type. */
+#define DstNone     (0<<1) /* No destination operand. */
+#define DstImplicit (0<<1) /* Destination operand is implicit in the opcode. */
+#define DstBitBase  (1<<1) /* Memory operand, bit string. */
+#define DstReg      (2<<1) /* Register operand. */
+#define DstEax      DstReg /* Register EAX (aka DstReg with no ModRM) */
+#define DstMem      (3<<1) /* Memory operand. */
+#define DstMask     (3<<1)
+/* Source operand type. */
+#define SrcNone     (0<<3) /* No source operand. */
+#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
+#define SrcReg      (1<<3) /* Register operand. */
+#define SrcEax      SrcReg /* Register EAX (aka SrcReg with no ModRM) */
+#define SrcMem      (2<<3) /* Memory operand. */
+#define SrcMem16    (3<<3) /* Memory operand (16-bit). */
+#define SrcImm      (4<<3) /* Immediate operand. */
+#define SrcImmByte  (5<<3) /* 8-bit sign-extended immediate operand. */
+#define SrcImm16    (6<<3) /* 16-bit zero-extended immediate operand. */
+#define SrcMask     (7<<3)
+/* Generic ModRM decode. */
+#define ModRM       (1<<6)
+/* vSIB addressing mode (0f38 extension opcodes only), aliasing ModRM. */
+#define vSIB        (1<<6)
+/* Destination is only written; never read. */
+#define Mov         (1<<7)
+/* VEX/EVEX (SIMD only): 2nd source operand unused (must be all ones) */
+#define TwoOp       Mov
+/* All operands are implicit in the opcode. */
+#define ImplicitOps (DstImplicit|SrcImplicit)
+
+typedef uint8_t opcode_desc_t;
+
+/* Type, address-of, and value of an instruction's operand. */
+struct operand {
+    enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
+    unsigned int bytes;
+
+    /* Operand value. */
+    unsigned long val;
+
+    /* Original operand value. */
+    unsigned long orig_val;
+
+    /* OP_REG: Pointer to register field. */
+    unsigned long *reg;
+
+    /* OP_MEM: Segment and offset. */
+    struct {
+        enum x86_segment seg;
+        unsigned long    off;
+    } mem;
+};
+
+#define REX_PREFIX 0x40
+#define REX_B 0x01
+#define REX_X 0x02
+#define REX_R 0x04
+#define REX_W 0x08
+
+enum simd_opsize {
+    simd_none,
+
+    /*
+     * Ordinary packed integers:
+     * - 64 bits without prefix 66 (MMX)
+     * - 128 bits with prefix 66 (SSEn)
+     * - 128/256/512 bits depending on VEX.L/EVEX.LR (AVX+)
+     */
+    simd_packed_int,
+
+    /*
+     * Ordinary packed/scalar floating point:
+     * - 128 bits without prefix or with prefix 66 (SSEn)
+     * - 128/256/512 bits depending on VEX.L/EVEX.LR (AVX+)
+     * - 32 bits with prefix F3 (scalar single)
+     * - 64 bits with prefix F2 (scalar doubgle)
+     */
+    simd_any_fp,
+
+    /*
+     * Packed floating point:
+     * - 128 bits without prefix or with prefix 66 (SSEn)
+     * - 128/256/512 bits depending on VEX.L/EVEX.LR (AVX+)
+     */
+    simd_packed_fp,
+
+    /*
+     * Single precision packed/scalar floating point:
+     * - 128 bits without prefix (SSEn)
+     * - 128/256/512 bits depending on VEX.L/EVEX.LR (AVX+)
+     * - 32 bits with prefix F3 (scalar)
+     */
+    simd_single_fp,
+
+    /*
+     * Scalar floating point:
+     * - 32 bits with low opcode bit clear (scalar single)
+     * - 64 bits with low opcode bit set (scalar double)
+     */
+    simd_scalar_opc,
+
+    /*
+     * Scalar floating point:
+     * - 32/64 bits depending on VEX.W/EVEX.W
+     */
+    simd_scalar_vexw,
+
+    /*
+     * 128 bits of integer or floating point data, with no further
+     * formatting information, or with it encoded by EVEX.W.
+     */
+    simd_128,
+
+    /*
+     * 256 bits of integer or floating point data, with formatting
+     * encoded by EVEX.W.
+     */
+    simd_256,
+
+    /* Operand size encoded in non-standard way. */
+    simd_other
+};
+typedef uint8_t simd_opsize_t;
+
+#define vex_none 0
+
+enum vex_opcx {
+    vex_0f = vex_none + 1,
+    vex_0f38,
+    vex_0f3a,
+};
+
+enum vex_pfx {
+    vex_66 = vex_none + 1,
+    vex_f3,
+    vex_f2
+};
+
+union vex {
+    uint8_t raw[2];
+    struct {             /* SDM names */
+        uint8_t opcx:5;  /* mmmmm */
+        uint8_t b:1;     /* B */
+        uint8_t x:1;     /* X */
+        uint8_t r:1;     /* R */
+        uint8_t pfx:2;   /* pp */
+        uint8_t l:1;     /* L */
+        uint8_t reg:4;   /* vvvv */
+        uint8_t w:1;     /* W */
+    };
+};
+
+union evex {
+    uint8_t raw[3];
+    struct {             /* SDM names */
+        uint8_t opcx:2;  /* mm */
+        uint8_t mbz:2;
+        uint8_t R:1;     /* R' */
+        uint8_t b:1;     /* B */
+        uint8_t x:1;     /* X */
+        uint8_t r:1;     /* R */
+        uint8_t pfx:2;   /* pp */
+        uint8_t mbs:1;
+        uint8_t reg:4;   /* vvvv */
+        uint8_t w:1;     /* W */
+        uint8_t opmsk:3; /* aaa */
+        uint8_t RX:1;    /* V' */
+        uint8_t brs:1;   /* b */
+        uint8_t lr:2;    /* L'L */
+        uint8_t z:1;     /* z */
+    };
+};
+
+struct x86_emulate_state {
+    unsigned int op_bytes, ad_bytes;
+
+    enum {
+        ext_none = vex_none,
+        ext_0f   = vex_0f,
+        ext_0f38 = vex_0f38,
+        ext_0f3a = vex_0f3a,
+        /*
+         * For XOP use values such that the respective instruction field
+         * can be used without adjustment.
+         */
+        ext_8f08 = 8,
+        ext_8f09,
+        ext_8f0a,
+    } ext;
+    enum {
+        rmw_NONE,
+        rmw_adc,
+        rmw_add,
+        rmw_and,
+        rmw_btc,
+        rmw_btr,
+        rmw_bts,
+        rmw_dec,
+        rmw_inc,
+        rmw_neg,
+        rmw_not,
+        rmw_or,
+        rmw_rcl,
+        rmw_rcr,
+        rmw_rol,
+        rmw_ror,
+        rmw_sar,
+        rmw_sbb,
+        rmw_shl,
+        rmw_shld,
+        rmw_shr,
+        rmw_shrd,
+        rmw_sub,
+        rmw_xadd,
+        rmw_xchg,
+        rmw_xor,
+    } rmw;
+    enum {
+        blk_NONE,
+        blk_enqcmd,
+#ifndef X86EMUL_NO_FPU
+        blk_fld, /* FLDENV, FRSTOR */
+        blk_fst, /* FNSTENV, FNSAVE */
+#endif
+#if !defined(X86EMUL_NO_FPU) || !defined(X86EMUL_NO_MMX) || \
+    !defined(X86EMUL_NO_SIMD)
+        blk_fxrstor,
+        blk_fxsave,
+#endif
+        blk_movdir,
+    } blk;
+    uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
+    uint8_t sib_index, sib_scale;
+    uint8_t rex_prefix;
+    bool lock_prefix;
+    bool not_64bit; /* Instruction not available in 64bit. */
+    bool fpu_ctrl;  /* Instruction is an FPU control one. */
+    opcode_desc_t desc;
+    union vex vex;
+    union evex evex;
+    enum simd_opsize simd_size;
+
+    /*
+     * Data operand effective address (usually computed from ModRM).
+     * Default is a memory operand relative to segment DS.
+     */
+    struct operand ea;
+
+    /* Immediate operand values, if any. Use otherwise unused fields. */
+#define imm1 ea.val
+#define imm2 ea.orig_val
+
+    unsigned long ip;
+    struct cpu_user_regs *regs;
+
+#ifndef NDEBUG
+    /*
+     * Track caller of x86_decode_insn() to spot missing as well as
+     * premature calls to x86_emulate_free_state().
+     */
+    void *caller;
+#endif
+};
+
+/*
+ * Externally visible return codes from x86_emulate() are non-negative.
+ * Use negative values for internal state change indicators from helpers
+ * to the main function.
+ */
+#define X86EMUL_rdtsc        (-1)
+
+/*
+ * These EFLAGS bits are restored from saved value during emulation, and
+ * any changes are written back to the saved value after emulation.
+ */
+#define EFLAGS_MASK (X86_EFLAGS_OF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
+                     X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
+
+/*
+ * These EFLAGS bits are modifiable (by POPF and IRET), possibly subject
+ * to further CPL and IOPL constraints.
+ */
+#define EFLAGS_MODIFIABLE (X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_RF | \
+                           X86_EFLAGS_NT | X86_EFLAGS_IOPL | X86_EFLAGS_DF | \
+                           X86_EFLAGS_IF | X86_EFLAGS_TF | EFLAGS_MASK)
+
+#define truncate_word(ea, byte_width)           \
+({  unsigned long __ea = (ea);                  \
+    unsigned int _width = (byte_width);         \
+    ((_width == sizeof(unsigned long)) ? __ea : \
+     (__ea & ((1UL << (_width << 3)) - 1)));    \
+})
+#define truncate_ea(ea) truncate_word((ea), ad_bytes)
+
+#define fail_if(p)                                      \
+do {                                                    \
+    rc = (p) ? X86EMUL_UNHANDLEABLE : X86EMUL_OKAY;     \
+    if ( rc ) goto done;                                \
+} while (0)
+
+#define EXPECT(p)                                       \
+do {                                                    \
+    if ( unlikely(!(p)) )                               \
+    {                                                   \
+        ASSERT_UNREACHABLE();                           \
+        goto unhandleable;                              \
+    }                                                   \
+} while (0)
+
+static inline int mkec(uint8_t e, int32_t ec, ...)
+{
+    return (e < 32 && ((1u << e) & X86_EXC_HAVE_EC)) ? ec : X86_EVENT_NO_EC;
+}
+
+#define generate_exception_if(p, e, ec...)                                \
+({  if ( (p) ) {                                                          \
+        x86_emul_hw_exception(e, mkec(e, ##ec, 0), ctxt);                 \
+        rc = X86EMUL_EXCEPTION;                                           \
+        goto done;                                                        \
+    }                                                                     \
+})
+
+#define generate_exception(e, ec...) generate_exception_if(true, e, ##ec)
+
+static inline bool
+in_realmode(
+    struct x86_emulate_ctxt *ctxt,
+    const struct x86_emulate_ops *ops)
+{
+    unsigned long cr0;
+    int rc;
+
+    if ( ops->read_cr == NULL )
+        return 0;
+
+    rc = ops->read_cr(0, &cr0, ctxt);
+    return (!rc && !(cr0 & X86_CR0_PE));
+}
+
+static inline bool
+in_protmode(
+    struct x86_emulate_ctxt *ctxt,
+    const struct x86_emulate_ops *ops)
+{
+    return !(in_realmode(ctxt, ops) || (ctxt->regs->eflags & X86_EFLAGS_VM));
+}
+
+#define mode_ring0() ({                         \
+    int _cpl = x86emul_get_cpl(ctxt, ops);      \
+    fail_if(_cpl < 0);                          \
+    (_cpl == 0);                                \
+})
+
+#define vcpu_has_fpu()         (ctxt->cpuid->basic.fpu)
+#define vcpu_has_sep()         (ctxt->cpuid->basic.sep)
+#define vcpu_has_cx8()         (ctxt->cpuid->basic.cx8)
+#define vcpu_has_cmov()        (ctxt->cpuid->basic.cmov)
+#define vcpu_has_clflush()     (ctxt->cpuid->basic.clflush)
+#define vcpu_has_mmx()         (ctxt->cpuid->basic.mmx)
+#define vcpu_has_fxsr()        (ctxt->cpuid->basic.fxsr)
+#define vcpu_has_sse()         (ctxt->cpuid->basic.sse)
+#define vcpu_has_sse2()        (ctxt->cpuid->basic.sse2)
+#define vcpu_has_sse3()        (ctxt->cpuid->basic.sse3)
+#define vcpu_has_pclmulqdq()   (ctxt->cpuid->basic.pclmulqdq)
+#define vcpu_has_ssse3()       (ctxt->cpuid->basic.ssse3)
+#define vcpu_has_fma()         (ctxt->cpuid->basic.fma)
+#define vcpu_has_cx16()        (ctxt->cpuid->basic.cx16)
+#define vcpu_has_sse4_1()      (ctxt->cpuid->basic.sse4_1)
+#define vcpu_has_sse4_2()      (ctxt->cpuid->basic.sse4_2)
+#define vcpu_has_movbe()       (ctxt->cpuid->basic.movbe)
+#define vcpu_has_popcnt()      (ctxt->cpuid->basic.popcnt)
+#define vcpu_has_aesni()       (ctxt->cpuid->basic.aesni)
+#define vcpu_has_avx()         (ctxt->cpuid->basic.avx)
+#define vcpu_has_f16c()        (ctxt->cpuid->basic.f16c)
+#define vcpu_has_rdrand()      (ctxt->cpuid->basic.rdrand)
+
+#define vcpu_has_mmxext()      (ctxt->cpuid->extd.mmxext || vcpu_has_sse())
+#define vcpu_has_3dnow_ext()   (ctxt->cpuid->extd._3dnowext)
+#define vcpu_has_3dnow()       (ctxt->cpuid->extd._3dnow)
+#define vcpu_has_lahf_lm()     (ctxt->cpuid->extd.lahf_lm)
+#define vcpu_has_cr8_legacy()  (ctxt->cpuid->extd.cr8_legacy)
+#define vcpu_has_lzcnt()       (ctxt->cpuid->extd.abm)
+#define vcpu_has_sse4a()       (ctxt->cpuid->extd.sse4a)
+#define vcpu_has_misalignsse() (ctxt->cpuid->extd.misalignsse)
+#define vcpu_has_xop()         (ctxt->cpuid->extd.xop)
+#define vcpu_has_fma4()        (ctxt->cpuid->extd.fma4)
+#define vcpu_has_tbm()         (ctxt->cpuid->extd.tbm)
+#define vcpu_has_clzero()      (ctxt->cpuid->extd.clzero)
+#define vcpu_has_wbnoinvd()    (ctxt->cpuid->extd.wbnoinvd)
+
+#define vcpu_has_bmi1()        (ctxt->cpuid->feat.bmi1)
+#define vcpu_has_hle()         (ctxt->cpuid->feat.hle)
+#define vcpu_has_avx2()        (ctxt->cpuid->feat.avx2)
+#define vcpu_has_bmi2()        (ctxt->cpuid->feat.bmi2)
+#define vcpu_has_invpcid()     (ctxt->cpuid->feat.invpcid)
+#define vcpu_has_rtm()         (ctxt->cpuid->feat.rtm)
+#define vcpu_has_mpx()         (ctxt->cpuid->feat.mpx)
+#define vcpu_has_avx512f()     (ctxt->cpuid->feat.avx512f)
+#define vcpu_has_avx512dq()    (ctxt->cpuid->feat.avx512dq)
+#define vcpu_has_rdseed()      (ctxt->cpuid->feat.rdseed)
+#define vcpu_has_adx()         (ctxt->cpuid->feat.adx)
+#define vcpu_has_smap()        (ctxt->cpuid->feat.smap)
+#define vcpu_has_avx512_ifma() (ctxt->cpuid->feat.avx512_ifma)
+#define vcpu_has_clflushopt()  (ctxt->cpuid->feat.clflushopt)
+#define vcpu_has_clwb()        (ctxt->cpuid->feat.clwb)
+#define vcpu_has_avx512pf()    (ctxt->cpuid->feat.avx512pf)
+#define vcpu_has_avx512er()    (ctxt->cpuid->feat.avx512er)
+#define vcpu_has_avx512cd()    (ctxt->cpuid->feat.avx512cd)
+#define vcpu_has_sha()         (ctxt->cpuid->feat.sha)
+#define vcpu_has_avx512bw()    (ctxt->cpuid->feat.avx512bw)
+#define vcpu_has_avx512vl()    (ctxt->cpuid->feat.avx512vl)
+#define vcpu_has_avx512_vbmi() (ctxt->cpuid->feat.avx512_vbmi)
+#define vcpu_has_avx512_vbmi2() (ctxt->cpuid->feat.avx512_vbmi2)
+#define vcpu_has_gfni()        (ctxt->cpuid->feat.gfni)
+#define vcpu_has_vaes()        (ctxt->cpuid->feat.vaes)
+#define vcpu_has_vpclmulqdq()  (ctxt->cpuid->feat.vpclmulqdq)
+#define vcpu_has_avx512_vnni() (ctxt->cpuid->feat.avx512_vnni)
+#define vcpu_has_avx512_bitalg() (ctxt->cpuid->feat.avx512_bitalg)
+#define vcpu_has_avx512_vpopcntdq() (ctxt->cpuid->feat.avx512_vpopcntdq)
+#define vcpu_has_tsxldtrk()    (ctxt->cpuid->feat.tsxldtrk)
+#define vcpu_has_rdpid()       (ctxt->cpuid->feat.rdpid)
+#define vcpu_has_movdiri()     (ctxt->cpuid->feat.movdiri)
+#define vcpu_has_movdir64b()   (ctxt->cpuid->feat.movdir64b)
+#define vcpu_has_enqcmd()      (ctxt->cpuid->feat.enqcmd)
+#define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
+#define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
+#define vcpu_has_avx512_vp2intersect() (ctxt->cpuid->feat.avx512_vp2intersect)
+#define vcpu_has_serialize()   (ctxt->cpuid->feat.serialize)
+#define vcpu_has_avx_vnni()    (ctxt->cpuid->feat.avx_vnni)
+#define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
+
+#define vcpu_must_have(feat) \
+    generate_exception_if(!vcpu_has_##feat(), X86_EXC_UD)
+
+#ifdef __XEN__
+/*
+ * Note the difference between vcpu_must_have(<feature>) and
+ * host_and_vcpu_must_have(<feature>): The latter needs to be used when
+ * emulation code is using the same instruction class for carrying out
+ * the actual operation.
+ */
+# define host_and_vcpu_must_have(feat) ({ \
+    generate_exception_if(!cpu_has_##feat, X86_EXC_UD); \
+    vcpu_must_have(feat); \
+})
+#else
+/*
+ * For the test harness both are fine to be used interchangeably, i.e.
+ * features known to always be available (e.g. SSE/SSE2) to (64-bit) Xen
+ * may be checked for by just vcpu_must_have().
+ */
+# define host_and_vcpu_must_have(feat) vcpu_must_have(feat)
+#endif
+
+int x86emul_get_cpl(struct x86_emulate_ctxt *ctxt,
+                    const struct x86_emulate_ops *ops);
+
+int x86emul_0f01(struct x86_emulate_state *s,
+                 struct cpu_user_regs *regs,
+                 struct operand *dst,
+                 struct x86_emulate_ctxt *ctxt,
+                 const struct x86_emulate_ops *ops);
+
+static inline bool umip_active(struct x86_emulate_ctxt *ctxt,
+                               const struct x86_emulate_ops *ops)
+{
+    unsigned long cr4;
+
+    /* Intentionally not using mode_ring0() here to avoid its fail_if(). */
+    return x86emul_get_cpl(ctxt, ops) > 0 &&
+           ops->read_cr && ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&
+           (cr4 & X86_CR4_UMIP);
+}
+
+/* Compatibility function: read guest memory, zero-extend result to a ulong. */
+static inline int read_ulong(enum x86_segment seg,
+                             unsigned long offset,
+                             unsigned long *val,
+                             unsigned int bytes,
+                             struct x86_emulate_ctxt *ctxt,
+                             const struct x86_emulate_ops *ops)
+{
+    *val = 0;
+    return ops->read(seg, offset, val, bytes, ctxt);
+}
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -20,39 +20,7 @@
  * along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
-/* Operand sizes: 8-bit operands or specified/overridden size. */
-#define ByteOp      (1<<0) /* 8-bit operands. */
-/* Destination operand type. */
-#define DstNone     (0<<1) /* No destination operand. */
-#define DstImplicit (0<<1) /* Destination operand is implicit in the opcode. */
-#define DstBitBase  (1<<1) /* Memory operand, bit string. */
-#define DstReg      (2<<1) /* Register operand. */
-#define DstEax      DstReg /* Register EAX (aka DstReg with no ModRM) */
-#define DstMem      (3<<1) /* Memory operand. */
-#define DstMask     (3<<1)
-/* Source operand type. */
-#define SrcNone     (0<<3) /* No source operand. */
-#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
-#define SrcReg      (1<<3) /* Register operand. */
-#define SrcEax      SrcReg /* Register EAX (aka SrcReg with no ModRM) */
-#define SrcMem      (2<<3) /* Memory operand. */
-#define SrcMem16    (3<<3) /* Memory operand (16-bit). */
-#define SrcImm      (4<<3) /* Immediate operand. */
-#define SrcImmByte  (5<<3) /* 8-bit sign-extended immediate operand. */
-#define SrcImm16    (6<<3) /* 16-bit zero-extended immediate operand. */
-#define SrcMask     (7<<3)
-/* Generic ModRM decode. */
-#define ModRM       (1<<6)
-/* vSIB addressing mode (0f38 extension opcodes only), aliasing ModRM. */
-#define vSIB        (1<<6)
-/* Destination is only written; never read. */
-#define Mov         (1<<7)
-/* VEX/EVEX (SIMD only): 2nd source operand unused (must be all ones) */
-#define TwoOp       Mov
-/* All operands are implicit in the opcode. */
-#define ImplicitOps (DstImplicit|SrcImplicit)
-
-typedef uint8_t opcode_desc_t;
+#include "private.h"
 
 static const opcode_desc_t opcode_table[256] = {
     /* 0x00 - 0x07 */
@@ -184,71 +152,6 @@ static const opcode_desc_t opcode_table[
     ImplicitOps, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
 };
 
-enum simd_opsize {
-    simd_none,
-
-    /*
-     * Ordinary packed integers:
-     * - 64 bits without prefix 66 (MMX)
-     * - 128 bits with prefix 66 (SSEn)
-     * - 128/256/512 bits depending on VEX.L/EVEX.LR (AVX+)
-     */
-    simd_packed_int,
-
-    /*
-     * Ordinary packed/scalar floating point:
-     * - 128 bits without prefix or with prefix 66 (SSEn)
-     * - 128/256/512 bits depending on VEX.L/EVEX.LR (AVX+)
-     * - 32 bits with prefix F3 (scalar single)
-     * - 64 bits with prefix F2 (scalar doubgle)
-     */
-    simd_any_fp,
-
-    /*
-     * Packed floating point:
-     * - 128 bits without prefix or with prefix 66 (SSEn)
-     * - 128/256/512 bits depending on VEX.L/EVEX.LR (AVX+)
-     */
-    simd_packed_fp,
-
-    /*
-     * Single precision packed/scalar floating point:
-     * - 128 bits without prefix (SSEn)
-     * - 128/256/512 bits depending on VEX.L/EVEX.LR (AVX+)
-     * - 32 bits with prefix F3 (scalar)
-     */
-    simd_single_fp,
-
-    /*
-     * Scalar floating point:
-     * - 32 bits with low opcode bit clear (scalar single)
-     * - 64 bits with low opcode bit set (scalar double)
-     */
-    simd_scalar_opc,
-
-    /*
-     * Scalar floating point:
-     * - 32/64 bits depending on VEX.W/EVEX.W
-     */
-    simd_scalar_vexw,
-
-    /*
-     * 128 bits of integer or floating point data, with no further
-     * formatting information, or with it encoded by EVEX.W.
-     */
-    simd_128,
-
-    /*
-     * 256 bits of integer or floating point data, with formatting
-     * encoded by EVEX.W.
-     */
-    simd_256,
-
-    /* Operand size encoded in non-standard way. */
-    simd_other
-};
-typedef uint8_t simd_opsize_t;
-
 enum disp8scale {
     /* Values 0 ... 4 are explicit sizes. */
     d8s_bw = 5,
@@ -670,45 +573,11 @@ static const struct ext8f09_table {
     [0xe1 ... 0xe3] = { .simd_size = simd_packed_int, .two_op = 1 },
 };
 
-#define REX_PREFIX 0x40
-#define REX_B 0x01
-#define REX_X 0x02
-#define REX_R 0x04
-#define REX_W 0x08
-
-#define vex_none 0
-
-enum vex_opcx {
-    vex_0f = vex_none + 1,
-    vex_0f38,
-    vex_0f3a,
-};
-
-enum vex_pfx {
-    vex_66 = vex_none + 1,
-    vex_f3,
-    vex_f2
-};
-
 #define VEX_PREFIX_DOUBLE_MASK 0x1
 #define VEX_PREFIX_SCALAR_MASK 0x2
 
 static const uint8_t sse_prefix[] = { 0x66, 0xf3, 0xf2 };
 
-union vex {
-    uint8_t raw[2];
-    struct {             /* SDM names */
-        uint8_t opcx:5;  /* mmmmm */
-        uint8_t b:1;     /* B */
-        uint8_t x:1;     /* X */
-        uint8_t r:1;     /* R */
-        uint8_t pfx:2;   /* pp */
-        uint8_t l:1;     /* L */
-        uint8_t reg:4;   /* vvvv */
-        uint8_t w:1;     /* W */
-    };
-};
-
 #ifdef __x86_64__
 # define PFX2 REX_PREFIX
 #else
@@ -748,27 +617,6 @@ union vex {
     } \
 } while (0)
 
-union evex {
-    uint8_t raw[3];
-    struct {             /* SDM names */
-        uint8_t opcx:2;  /* mm */
-        uint8_t mbz:2;
-        uint8_t R:1;     /* R' */
-        uint8_t b:1;     /* B */
-        uint8_t x:1;     /* X */
-        uint8_t r:1;     /* R */
-        uint8_t pfx:2;   /* pp */
-        uint8_t mbs:1;
-        uint8_t reg:4;   /* vvvv */
-        uint8_t w:1;     /* W */
-        uint8_t opmsk:3; /* aaa */
-        uint8_t RX:1;    /* V' */
-        uint8_t brs:1;   /* b */
-        uint8_t lr:2;    /* L'L */
-        uint8_t z:1;     /* z */
-    };
-};
-
 #define EVEX_PFX_BYTES 4
 #define init_evex(stub) ({ \
     uint8_t *buf_ = get_stub(stub); \
@@ -789,118 +637,6 @@ union evex {
 #define repe_prefix()  (vex.pfx == vex_f3)
 #define repne_prefix() (vex.pfx == vex_f2)
 
-/* Type, address-of, and value of an instruction's operand. */
-struct operand {
-    enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
-    unsigned int bytes;
-
-    /* Operand value. */
-    unsigned long val;
-
-    /* Original operand value. */
-    unsigned long orig_val;
-
-    /* OP_REG: Pointer to register field. */
-    unsigned long *reg;
-
-    /* OP_MEM: Segment and offset. */
-    struct {
-        enum x86_segment seg;
-        unsigned long    off;
-    } mem;
-};
-
-struct x86_emulate_state {
-    unsigned int op_bytes, ad_bytes;
-
-    enum {
-        ext_none = vex_none,
-        ext_0f   = vex_0f,
-        ext_0f38 = vex_0f38,
-        ext_0f3a = vex_0f3a,
-        /*
-         * For XOP use values such that the respective instruction field
-         * can be used without adjustment.
-         */
-        ext_8f08 = 8,
-        ext_8f09,
-        ext_8f0a,
-    } ext;
-    enum {
-        rmw_NONE,
-        rmw_adc,
-        rmw_add,
-        rmw_and,
-        rmw_btc,
-        rmw_btr,
-        rmw_bts,
-        rmw_dec,
-        rmw_inc,
-        rmw_neg,
-        rmw_not,
-        rmw_or,
-        rmw_rcl,
-        rmw_rcr,
-        rmw_rol,
-        rmw_ror,
-        rmw_sar,
-        rmw_sbb,
-        rmw_shl,
-        rmw_shld,
-        rmw_shr,
-        rmw_shrd,
-        rmw_sub,
-        rmw_xadd,
-        rmw_xchg,
-        rmw_xor,
-    } rmw;
-    enum {
-        blk_NONE,
-        blk_enqcmd,
-#ifndef X86EMUL_NO_FPU
-        blk_fld, /* FLDENV, FRSTOR */
-        blk_fst, /* FNSTENV, FNSAVE */
-#endif
-#if !defined(X86EMUL_NO_FPU) || !defined(X86EMUL_NO_MMX) || \
-    !defined(X86EMUL_NO_SIMD)
-        blk_fxrstor,
-        blk_fxsave,
-#endif
-        blk_movdir,
-    } blk;
-    uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
-    uint8_t sib_index, sib_scale;
-    uint8_t rex_prefix;
-    bool lock_prefix;
-    bool not_64bit; /* Instruction not available in 64bit. */
-    bool fpu_ctrl;  /* Instruction is an FPU control one. */
-    opcode_desc_t desc;
-    union vex vex;
-    union evex evex;
-    enum simd_opsize simd_size;
-
-    /*
-     * Data operand effective address (usually computed from ModRM).
-     * Default is a memory operand relative to segment DS.
-     */
-    struct operand ea;
-
-    /* Immediate operand values, if any. Use otherwise unused fields. */
-#define imm1 ea.val
-#define imm2 ea.orig_val
-
-    unsigned long ip;
-    struct cpu_user_regs *regs;
-
-#ifndef NDEBUG
-    /*
-     * Track caller of x86_decode_insn() to spot missing as well as
-     * premature calls to x86_emulate_free_state().
-     */
-    void *caller;
-#endif
-};
-
 #ifdef __x86_64__
 #define PTR_POISON ((void *)0x8086000000008086UL) /* non-canonical */
 #else
@@ -1049,21 +785,6 @@ struct x86_fxsr {
 #define _BYTES_PER_LONG "4"
 #endif
 
-/*
- * These EFLAGS bits are restored from saved value during emulation, and
- * any changes are written back to the saved value after emulation.
- */
-#define EFLAGS_MASK (X86_EFLAGS_OF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
-                     X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
-
-/*
- * These EFLAGS bits are modifiable (by POPF and IRET), possibly subject
- * to further CPL and IOPL constraints.
- */
-#define EFLAGS_MODIFIABLE (X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_RF | \
-                           X86_EFLAGS_NT | X86_EFLAGS_IOPL | X86_EFLAGS_DF | \
-                           X86_EFLAGS_IF | X86_EFLAGS_TF | EFLAGS_MASK)
-
 /* Before executing instruction: restore necessary bits in EFLAGS. */
 #define _PRE_EFLAGS(_sav, _msk, _tmp)                           \
 /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
@@ -1223,36 +944,6 @@ do{ asm volatile (
 #define __emulate_1op_8byte(op, dst, eflags, extra...)
 #endif /* __i386__ */
 
-#define fail_if(p)                                      \
-do {                                                    \
-    rc = (p) ? X86EMUL_UNHANDLEABLE : X86EMUL_OKAY;     \
-    if ( rc ) goto done;                                \
-} while (0)
-
-#define EXPECT(p)                                       \
-do {                                                    \
-    if ( unlikely(!(p)) )                               \
-    {                                                   \
-        ASSERT_UNREACHABLE();                           \
-        goto unhandleable;                              \
-    }                                                   \
-} while (0)
-
-static inline int mkec(uint8_t e, int32_t ec, ...)
-{
-    return (e < 32 && ((1u << e) & EXC_HAS_EC)) ? ec : X86_EVENT_NO_EC;
-}
-
-#define generate_exception_if(p, e, ec...)                                \
-({  if ( (p) ) {                                                          \
-        x86_emul_hw_exception(e, mkec(e, ##ec, 0), ctxt);                 \
-        rc = X86EMUL_EXCEPTION;                                           \
-        goto done;                                                        \
-    }                                                                     \
-})
-
-#define generate_exception(e, ec...) generate_exception_if(true, e, ##ec)
-
 #ifdef __XEN__
 # define invoke_stub(pre, post, constraints...) do {                    \
     stub_exn.info = (union stub_exception_token) { .raw = ~0 };         \
@@ -1301,20 +992,6 @@ static inline int mkec(uint8_t e, int32_
 })
 #define insn_fetch_type(_type) ((_type)insn_fetch_bytes(sizeof(_type)))
 
-#define truncate_word(ea, byte_width)           \
-({  unsigned long __ea = (ea);                  \
-    unsigned int _width = (byte_width);         \
-    ((_width == sizeof(unsigned long)) ? __ea : \
-     (__ea & ((1UL << (_width << 3)) - 1)));    \
-})
-#define truncate_ea(ea) truncate_word((ea), ad_bytes)
-
-#ifdef __x86_64__
-# define mode_64bit() (ctxt->addr_size == 64)
-#else
-# define mode_64bit() false
-#endif
-
 /*
  * Given byte has even parity (even number of 1s)? SDM Vol. 1 Sec. 3.4.3.1,
  * "Status Flags": EFLAGS.PF reflects parity of least-sig. byte of result only.
@@ -1655,19 +1332,6 @@ static void __put_rep_prefix(
     ea__;                                                                 \
 })
 
-/* Compatibility function: read guest memory, zero-extend result to a ulong. */
-static int read_ulong(
-        enum x86_segment seg,
-        unsigned long offset,
-        unsigned long *val,
-        unsigned int bytes,
-        struct x86_emulate_ctxt *ctxt,
-        const struct x86_emulate_ops *ops)
-{
-    *val = 0;
-    return ops->read(seg, offset, val, bytes, ctxt);
-}
-
 /*
  * Unsigned multiplication with double-word result.
  * IN:  Multiplicand=m[0], Multiplier=m[1]
@@ -1792,10 +1456,8 @@ test_cc(
     return (!!rc ^ (condition & 1));
 }
 
-static int
-get_cpl(
-    struct x86_emulate_ctxt *ctxt,
-    const struct x86_emulate_ops  *ops)
+int x86emul_get_cpl(struct x86_emulate_ctxt *ctxt,
+                    const struct x86_emulate_ops *ops)
 {
     struct segment_register reg;
 
@@ -1814,17 +1476,12 @@ _mode_iopl(
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops  *ops)
 {
-    int cpl = get_cpl(ctxt, ops);
+    int cpl = x86emul_get_cpl(ctxt, ops);
     if ( cpl == -1 )
         return -1;
     return cpl <= MASK_EXTR(ctxt->regs->eflags, X86_EFLAGS_IOPL);
 }
 
-#define mode_ring0() ({                         \
-    int _cpl = get_cpl(ctxt, ops);              \
-    fail_if(_cpl < 0);                          \
-    (_cpl == 0);                                \
-})
 #define mode_iopl() ({                          \
     int _iopl = _mode_iopl(ctxt, ops);          \
     fail_if(_iopl < 0);                         \
@@ -1832,7 +1489,7 @@ _mode_iopl(
 })
 #define mode_vif() ({                                        \
     cr4 = 0;                                                 \
-    if ( ops->read_cr && get_cpl(ctxt, ops) == 3 )           \
+    if ( ops->read_cr && x86emul_get_cpl(ctxt, ops) == 3 )   \
     {                                                        \
         rc = ops->read_cr(4, &cr4, ctxt);                    \
         if ( rc != X86EMUL_OKAY ) goto done;                 \
@@ -1900,29 +1557,6 @@ static int ioport_access_check(
 }
 
 static bool
-in_realmode(
-    struct x86_emulate_ctxt *ctxt,
-    const struct x86_emulate_ops  *ops)
-{
-    unsigned long cr0;
-    int rc;
-
-    if ( ops->read_cr == NULL )
-        return 0;
-
-    rc = ops->read_cr(0, &cr0, ctxt);
-    return (!rc && !(cr0 & X86_CR0_PE));
-}
-
-static bool
-in_protmode(
-    struct x86_emulate_ctxt *ctxt,
-    const struct x86_emulate_ops  *ops)
-{
-    return !(in_realmode(ctxt, ops) || (ctxt->regs->eflags & X86_EFLAGS_VM));
-}
-
-static bool
 _amd_like(const struct cpuid_policy *cp)
 {
     return cp->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON);
@@ -1934,107 +1568,6 @@ amd_like(const struct x86_emulate_ctxt *
     return _amd_like(ctxt->cpuid);
 }
 
-#define vcpu_has_fpu()         (ctxt->cpuid->basic.fpu)
-#define vcpu_has_sep()         (ctxt->cpuid->basic.sep)
-#define vcpu_has_cx8()         (ctxt->cpuid->basic.cx8)
-#define vcpu_has_cmov()        (ctxt->cpuid->basic.cmov)
-#define vcpu_has_clflush()     (ctxt->cpuid->basic.clflush)
-#define vcpu_has_mmx()         (ctxt->cpuid->basic.mmx)
-#define vcpu_has_fxsr()        (ctxt->cpuid->basic.fxsr)
-#define vcpu_has_sse()         (ctxt->cpuid->basic.sse)
-#define vcpu_has_sse2()        (ctxt->cpuid->basic.sse2)
-#define vcpu_has_sse3()        (ctxt->cpuid->basic.sse3)
-#define vcpu_has_pclmulqdq()   (ctxt->cpuid->basic.pclmulqdq)
-#define vcpu_has_ssse3()       (ctxt->cpuid->basic.ssse3)
-#define vcpu_has_fma()         (ctxt->cpuid->basic.fma)
-#define vcpu_has_cx16()        (ctxt->cpuid->basic.cx16)
-#define vcpu_has_sse4_1()      (ctxt->cpuid->basic.sse4_1)
-#define vcpu_has_sse4_2()      (ctxt->cpuid->basic.sse4_2)
-#define vcpu_has_movbe()       (ctxt->cpuid->basic.movbe)
-#define vcpu_has_popcnt()      (ctxt->cpuid->basic.popcnt)
-#define vcpu_has_aesni()       (ctxt->cpuid->basic.aesni)
-#define vcpu_has_avx()         (ctxt->cpuid->basic.avx)
-#define vcpu_has_f16c()        (ctxt->cpuid->basic.f16c)
-#define vcpu_has_rdrand()      (ctxt->cpuid->basic.rdrand)
-
-#define vcpu_has_mmxext()      (ctxt->cpuid->extd.mmxext || vcpu_has_sse())
-#define vcpu_has_3dnow_ext()   (ctxt->cpuid->extd._3dnowext)
-#define vcpu_has_3dnow()       (ctxt->cpuid->extd._3dnow)
-#define vcpu_has_lahf_lm()     (ctxt->cpuid->extd.lahf_lm)
-#define vcpu_has_cr8_legacy()  (ctxt->cpuid->extd.cr8_legacy)
-#define vcpu_has_lzcnt()       (ctxt->cpuid->extd.abm)
-#define vcpu_has_sse4a()       (ctxt->cpuid->extd.sse4a)
-#define vcpu_has_misalignsse() (ctxt->cpuid->extd.misalignsse)
-#define vcpu_has_xop()         (ctxt->cpuid->extd.xop)
-#define vcpu_has_fma4()        (ctxt->cpuid->extd.fma4)
-#define vcpu_has_tbm()         (ctxt->cpuid->extd.tbm)
-#define vcpu_has_clzero()      (ctxt->cpuid->extd.clzero)
-#define vcpu_has_wbnoinvd()    (ctxt->cpuid->extd.wbnoinvd)
-
-#define vcpu_has_bmi1()        (ctxt->cpuid->feat.bmi1)
-#define vcpu_has_hle()         (ctxt->cpuid->feat.hle)
-#define vcpu_has_avx2()        (ctxt->cpuid->feat.avx2)
-#define vcpu_has_bmi2()        (ctxt->cpuid->feat.bmi2)
-#define vcpu_has_invpcid()     (ctxt->cpuid->feat.invpcid)
-#define vcpu_has_rtm()         (ctxt->cpuid->feat.rtm)
-#define vcpu_has_mpx()         (ctxt->cpuid->feat.mpx)
-#define vcpu_has_avx512f()     (ctxt->cpuid->feat.avx512f)
-#define vcpu_has_avx512dq()    (ctxt->cpuid->feat.avx512dq)
-#define vcpu_has_rdseed()      (ctxt->cpuid->feat.rdseed)
-#define vcpu_has_adx()         (ctxt->cpuid->feat.adx)
-#define vcpu_has_smap()        (ctxt->cpuid->feat.smap)
-#define vcpu_has_avx512_ifma() (ctxt->cpuid->feat.avx512_ifma)
-#define vcpu_has_clflushopt()  (ctxt->cpuid->feat.clflushopt)
-#define vcpu_has_clwb()        (ctxt->cpuid->feat.clwb)
-#define vcpu_has_avx512pf()    (ctxt->cpuid->feat.avx512pf)
-#define vcpu_has_avx512er()    (ctxt->cpuid->feat.avx512er)
-#define vcpu_has_avx512cd()    (ctxt->cpuid->feat.avx512cd)
-#define vcpu_has_sha()         (ctxt->cpuid->feat.sha)
-#define vcpu_has_avx512bw()    (ctxt->cpuid->feat.avx512bw)
-#define vcpu_has_avx512vl()    (ctxt->cpuid->feat.avx512vl)
-#define vcpu_has_avx512_vbmi() (ctxt->cpuid->feat.avx512_vbmi)
-#define vcpu_has_avx512_vbmi2() (ctxt->cpuid->feat.avx512_vbmi2)
-#define vcpu_has_gfni()        (ctxt->cpuid->feat.gfni)
-#define vcpu_has_vaes()        (ctxt->cpuid->feat.vaes)
-#define vcpu_has_vpclmulqdq()  (ctxt->cpuid->feat.vpclmulqdq)
-#define vcpu_has_avx512_vnni() (ctxt->cpuid->feat.avx512_vnni)
-#define vcpu_has_avx512_bitalg() (ctxt->cpuid->feat.avx512_bitalg)
-#define vcpu_has_avx512_vpopcntdq() (ctxt->cpuid->feat.avx512_vpopcntdq)
-#define vcpu_has_tsxldtrk()    (ctxt->cpuid->feat.tsxldtrk)
-#define vcpu_has_rdpid()       (ctxt->cpuid->feat.rdpid)
-#define vcpu_has_movdiri()     (ctxt->cpuid->feat.movdiri)
-#define vcpu_has_movdir64b()   (ctxt->cpuid->feat.movdir64b)
-#define vcpu_has_enqcmd()      (ctxt->cpuid->feat.enqcmd)
-#define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
-#define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
-#define vcpu_has_avx512_vp2intersect() (ctxt->cpuid->feat.avx512_vp2intersect)
-#define vcpu_has_serialize()   (ctxt->cpuid->feat.serialize)
-#define vcpu_has_avx_vnni()    (ctxt->cpuid->feat.avx_vnni)
-#define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
-
-#define vcpu_must_have(feat) \
-    generate_exception_if(!vcpu_has_##feat(), EXC_UD)
-
-#ifdef __XEN__
-/*
- * Note the difference between vcpu_must_have(<feature>) and
- * host_and_vcpu_must_have(<feature>): The latter needs to be used when
- * emulation code is using the same instruction class for carrying out
- * the actual operation.
- */
-#define host_and_vcpu_must_have(feat) ({ \
-    generate_exception_if(!cpu_has_##feat, EXC_UD); \
-    vcpu_must_have(feat); \
-})
-#else
-/*
- * For the test harness both are fine to be used interchangeably, i.e.
- * features known to always be available (e.g. SSE/SSE2) to (64-bit) Xen
- * may be checked for by just vcpu_must_have().
- */
-#define host_and_vcpu_must_have(feat) vcpu_must_have(feat)
-#endif
-
 /* Initialise output state in x86_emulate_ctxt */
 static void init_context(struct x86_emulate_ctxt *ctxt)
 {
@@ -2081,7 +1614,7 @@ protmode_load_seg(
     enum x86_segment sel_seg = (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr;
     struct { uint32_t a, b; } desc, desc_hi = {};
     uint8_t dpl, rpl;
-    int cpl = get_cpl(ctxt, ops);
+    int cpl = x86emul_get_cpl(ctxt, ops);
     uint32_t a_flag = 0x100;
     int rc, fault_type = EXC_GP;
 
@@ -2481,17 +2014,6 @@ static bool is_branch_step(struct x86_em
            (debugctl & IA32_DEBUGCTLMSR_BTF);
 }
 
-static bool umip_active(struct x86_emulate_ctxt *ctxt,
-                        const struct x86_emulate_ops *ops)
-{
-    unsigned long cr4;
-
-    /* Intentionally not using mode_ring0() here to avoid its fail_if(). */
-    return get_cpl(ctxt, ops) > 0 &&
-           ops->read_cr && ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&
-           (cr4 & X86_CR4_UMIP);
-}
-
 static void adjust_bnd(struct x86_emulate_ctxt *ctxt,
                        const struct x86_emulate_ops *ops, enum vex_pfx pfx)
 {
@@ -5703,317 +5225,8 @@ x86_emulate(
         break;
 
     case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */
-    {
-        unsigned long base, limit, cr0, cr0w;
-
-        seg = (modrm_reg & 1) ? x86_seg_idtr : x86_seg_gdtr;
-
-        switch( modrm )
-        {
-        case 0xca: /* clac */
-        case 0xcb: /* stac */
-            vcpu_must_have(smap);
-            generate_exception_if(vex.pfx || !mode_ring0(), EXC_UD);
-
-            _regs.eflags &= ~X86_EFLAGS_AC;
-            if ( modrm == 0xcb )
-                _regs.eflags |= X86_EFLAGS_AC;
-            break;
-
-        case 0xd0: /* xgetbv */
-            generate_exception_if(vex.pfx, EXC_UD);
-            if ( !ops->read_cr || !ops->read_xcr ||
-                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
-                cr4 = 0;
-            generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);
-            rc = ops->read_xcr(_regs.ecx, &msr_val, ctxt);
-            if ( rc != X86EMUL_OKAY )
-                goto done;
-            _regs.r(ax) = (uint32_t)msr_val;
-            _regs.r(dx) = msr_val >> 32;
-            break;
-
-        case 0xd1: /* xsetbv */
-            generate_exception_if(vex.pfx, EXC_UD);
-            if ( !ops->read_cr || !ops->write_xcr ||
-                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
-                cr4 = 0;
-            generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);
-            generate_exception_if(!mode_ring0(), EXC_GP, 0);
-            rc = ops->write_xcr(_regs.ecx,
-                                _regs.eax | ((uint64_t)_regs.edx << 32), ctxt);
-            if ( rc != X86EMUL_OKAY )
-                goto done;
-            break;
-
-        case 0xd4: /* vmfunc */
-            generate_exception_if(vex.pfx, EXC_UD);
-            fail_if(!ops->vmfunc);
-            if ( (rc = ops->vmfunc(ctxt)) != X86EMUL_OKAY )
-                goto done;
-            break;
-
-        case 0xd5: /* xend */
-            generate_exception_if(vex.pfx, EXC_UD);
-            generate_exception_if(!vcpu_has_rtm(), EXC_UD);
-            generate_exception_if(vcpu_has_rtm(), EXC_GP, 0);
-            break;
-
-        case 0xd6: /* xtest */
-            generate_exception_if(vex.pfx, EXC_UD);
-            generate_exception_if(!vcpu_has_rtm() && !vcpu_has_hle(),
-                                  EXC_UD);
-            /* Neither HLE nor RTM can be active when we get here. */
-            _regs.eflags |= X86_EFLAGS_ZF;
-            break;
-
-        case 0xdf: /* invlpga */
-            fail_if(!ops->read_msr);
-            if ( (rc = ops->read_msr(MSR_EFER,
-                                     &msr_val, ctxt)) != X86EMUL_OKAY )
-                goto done;
-            /* Finding SVME set implies vcpu_has_svm(). */
-            generate_exception_if(!(msr_val & EFER_SVME) ||
-                                  !in_protmode(ctxt, ops), EXC_UD);
-            generate_exception_if(!mode_ring0(), EXC_GP, 0);
-            fail_if(!ops->tlb_op);
-            if ( (rc = ops->tlb_op(x86emul_invlpga, truncate_ea(_regs.r(ax)),
-                                   _regs.ecx, ctxt)) != X86EMUL_OKAY )
-                goto done;
-            break;
-
-        case 0xe8:
-            switch ( vex.pfx )
-            {
-            case vex_none: /* serialize */
-                host_and_vcpu_must_have(serialize);
-                asm volatile ( ".byte 0x0f, 0x01, 0xe8" );
-                break;
-            case vex_f2: /* xsusldtrk */
-                vcpu_must_have(tsxldtrk);
-                /*
-                 * We're never in a transactional region when coming here
-                 * - nothing else to do.
-                 */
-                break;
-            default:
-                goto unimplemented_insn;
-            }
-            break;
-
-        case 0xe9:
-            switch ( vex.pfx )
-            {
-            case vex_f2: /* xresldtrk */
-                vcpu_must_have(tsxldtrk);
-                /*
-                 * We're never in a transactional region when coming here
-                 * - nothing else to do.
-                 */
-                break;
-            default:
-                goto unimplemented_insn;
-            }
-            break;
-
-        case 0xee:
-            switch ( vex.pfx )
-            {
-            case vex_none: /* rdpkru */
-                if ( !ops->read_cr ||
-                     ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
-                    cr4 = 0;
-                generate_exception_if(!(cr4 & X86_CR4_PKE), EXC_UD);
-                generate_exception_if(_regs.ecx, EXC_GP, 0);
-                _regs.r(ax) = rdpkru();
-                _regs.r(dx) = 0;
-                break;
-            default:
-                goto unimplemented_insn;
-            }
-            break;
-
-        case 0xef:
-            switch ( vex.pfx )
-            {
-            case vex_none: /* wrpkru */
-                if ( !ops->read_cr ||
-                     ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
-                    cr4 = 0;
-                generate_exception_if(!(cr4 & X86_CR4_PKE), EXC_UD);
-                generate_exception_if(_regs.ecx | _regs.edx, EXC_GP, 0);
-                wrpkru(_regs.eax);
-                break;
-            default:
-                goto unimplemented_insn;
-            }
-            break;
-
-        case 0xf8: /* swapgs */
-            generate_exception_if(!mode_64bit(), EXC_UD);
-            generate_exception_if(!mode_ring0(), EXC_GP, 0);
-            fail_if(!ops->read_segment || !ops->read_msr ||
-                    !ops->write_segment || !ops->write_msr);
-            if ( (rc = ops->read_segment(x86_seg_gs, &sreg,
-                                         ctxt)) != X86EMUL_OKAY ||
-                 (rc = ops->read_msr(MSR_SHADOW_GS_BASE, &msr_val,
-                                     ctxt)) != X86EMUL_OKAY ||
-                 (rc = ops->write_msr(MSR_SHADOW_GS_BASE, sreg.base,
-                                      ctxt)) != X86EMUL_OKAY )
-                goto done;
-            sreg.base = msr_val;
-            if ( (rc = ops->write_segment(x86_seg_gs, &sreg,
-                                          ctxt)) != X86EMUL_OKAY )
-            {
-                /* Best effort unwind (i.e. no error checking). */
-                ops->write_msr(MSR_SHADOW_GS_BASE, msr_val, ctxt);
-                goto done;
-            }
-            break;
-
-        case 0xf9: /* rdtscp */
-            fail_if(ops->read_msr == NULL);
-            if ( (rc = ops->read_msr(MSR_TSC_AUX,
-                                     &msr_val, ctxt)) != X86EMUL_OKAY )
-                goto done;
-            _regs.r(cx) = (uint32_t)msr_val;
-            goto rdtsc;
-
-        case 0xfc: /* clzero */
-        {
-            unsigned long zero = 0;
-
-            vcpu_must_have(clzero);
-
-            base = ad_bytes == 8 ? _regs.r(ax) :
-                   ad_bytes == 4 ? _regs.eax : _regs.ax;
-            limit = ctxt->cpuid->basic.clflush_size * 8;
-            generate_exception_if(limit < sizeof(long) ||
-                                  (limit & (limit - 1)), EXC_UD);
-            base &= ~(limit - 1);
-            if ( ops->rep_stos )
-            {
-                unsigned long nr_reps = limit / sizeof(zero);
-
-                rc = ops->rep_stos(&zero, ea.mem.seg, base, sizeof(zero),
-                                   &nr_reps, ctxt);
-                if ( rc == X86EMUL_OKAY )
-                {
-                    base += nr_reps * sizeof(zero);
-                    limit -= nr_reps * sizeof(zero);
-                }
-                else if ( rc != X86EMUL_UNHANDLEABLE )
-                    goto done;
-            }
-            fail_if(limit && !ops->write);
-            while ( limit )
-            {
-                rc = ops->write(ea.mem.seg, base, &zero, sizeof(zero), ctxt);
-                if ( rc != X86EMUL_OKAY )
-                    goto done;
-                base += sizeof(zero);
-                limit -= sizeof(zero);
-            }
-            break;
-        }
-
-#define _GRP7(mod, reg) \
-            (((mod) << 6) | ((reg) << 3)) ... (((mod) << 6) | ((reg) << 3) | 7)
-#define GRP7_MEM(reg) _GRP7(0, reg): case _GRP7(1, reg): case _GRP7(2, reg)
-#define GRP7_ALL(reg) GRP7_MEM(reg): case _GRP7(3, reg)
-
-        case GRP7_MEM(0): /* sgdt */
-        case GRP7_MEM(1): /* sidt */
-            ASSERT(ea.type == OP_MEM);
-            generate_exception_if(umip_active(ctxt, ops), EXC_GP, 0);
-            fail_if(!ops->read_segment || !ops->write);
-            if ( (rc = ops->read_segment(seg, &sreg, ctxt)) )
-                goto done;
-            if ( mode_64bit() )
-                op_bytes = 8;
-            else if ( op_bytes == 2 )
-            {
-                sreg.base &= 0xffffff;
-                op_bytes = 4;
-            }
-            if ( (rc = ops->write(ea.mem.seg, ea.mem.off, &sreg.limit,
-                                  2, ctxt)) != X86EMUL_OKAY ||
-                 (rc = ops->write(ea.mem.seg, truncate_ea(ea.mem.off + 2),
-                                  &sreg.base, op_bytes, ctxt)) != X86EMUL_OKAY )
-                goto done;
-            break;
-
-        case GRP7_MEM(2): /* lgdt */
-        case GRP7_MEM(3): /* lidt */
-            ASSERT(ea.type == OP_MEM);
-            generate_exception_if(!mode_ring0(), EXC_GP, 0);
-            fail_if(ops->write_segment == NULL);
-            memset(&sreg, 0, sizeof(sreg));
-            if ( (rc = read_ulong(ea.mem.seg, ea.mem.off,
-                                  &limit, 2, ctxt, ops)) ||
-                 (rc = read_ulong(ea.mem.seg, truncate_ea(ea.mem.off + 2),
-                                  &base, mode_64bit() ? 8 : 4, ctxt, ops)) )
-                goto done;
-            generate_exception_if(!is_canonical_address(base), EXC_GP, 0);
-            sreg.base = base;
-            sreg.limit = limit;
-            if ( !mode_64bit() && op_bytes == 2 )
-                sreg.base &= 0xffffff;
-            if ( (rc = ops->write_segment(seg, &sreg, ctxt)) )
-                goto done;
-            break;
-
-        case GRP7_ALL(4): /* smsw */
-            generate_exception_if(umip_active(ctxt, ops), EXC_GP, 0);
-            if ( ea.type == OP_MEM )
-            {
-                fail_if(!ops->write);
-                d |= Mov; /* force writeback */
-                ea.bytes = 2;
-            }
-            else
-                ea.bytes = op_bytes;
-            dst = ea;
-            fail_if(ops->read_cr == NULL);
-            if ( (rc = ops->read_cr(0, &dst.val, ctxt)) )
-                goto done;
-            break;
-
-        case GRP7_ALL(6): /* lmsw */
-            fail_if(ops->read_cr == NULL);
-            fail_if(ops->write_cr == NULL);
-            generate_exception_if(!mode_ring0(), EXC_GP, 0);
-            if ( (rc = ops->read_cr(0, &cr0, ctxt)) )
-                goto done;
-            if ( ea.type == OP_REG )
-                cr0w = *ea.reg;
-            else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off,
-                                       &cr0w, 2, ctxt, ops)) )
-                goto done;
-            /* LMSW can: (1) set bits 0-3; (2) clear bits 1-3. */
-            cr0 = (cr0 & ~0xe) | (cr0w & 0xf);
-            if ( (rc = ops->write_cr(0, cr0, ctxt)) )
-                goto done;
-            break;
-
-        case GRP7_MEM(7): /* invlpg */
-            ASSERT(ea.type == OP_MEM);
-            generate_exception_if(!mode_ring0(), EXC_GP, 0);
-            fail_if(!ops->tlb_op);
-            if ( (rc = ops->tlb_op(x86emul_invlpg, ea.mem.off, ea.mem.seg,
-                                   ctxt)) != X86EMUL_OKAY )
-                goto done;
-            break;
-
-#undef GRP7_ALL
-#undef GRP7_MEM
-#undef _GRP7
-
-        default:
-            goto unimplemented_insn;
-        }
-        break;
-    }
+        rc = x86emul_0f01(state, &_regs, &dst, ctxt, ops);
+        goto dispatch_from_helper;
 
     case X86EMUL_OPC(0x0f, 0x02): /* lar */
         generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
@@ -11309,6 +10522,24 @@ x86_emulate(
     unrecognized_insn:
         rc = X86EMUL_UNRECOGNIZED;
         goto done;
+
+    dispatch_from_helper:
+        if ( rc == X86EMUL_OKAY )
+            break;
+
+        switch ( rc )
+        {
+        case X86EMUL_rdtsc:
+            goto rdtsc;
+        }
+
+        /* Internally used state change indicators may not make it here. */
+        if ( rc < 0 )
+        {
+            ASSERT_UNREACHABLE();
+            rc = X86EMUL_UNHANDLEABLE;
+        }
+        goto done;
     }
 
     if ( state->rmw )



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/7] x86emul: split off opcode 0fae handling
  2021-08-11 12:21 [PATCH 0/7] x86emul: a few small steps towards disintegration Jan Beulich
  2021-08-11 12:22 ` [PATCH 1/7] x86emul: split off opcode 0f01 handling Jan Beulich
@ 2021-08-11 12:23 ` Jan Beulich
  2021-08-11 12:23 ` [PATCH 3/7] x86emul: split off opcode 0fc7 handling Jan Beulich
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2021-08-11 12:23 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

There's a fair amount of sub-cases (with some yet to be implemented), so
a separate function seems warranted.

Code moved gets slightly adjusted in a few places, e.g. replacing EXC_*
by X86_EXC_* (such that EXC_* don't need to move as well; we want these
to be phased out anyway).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/fuzz/x86_instruction_emulator/Makefile
+++ b/tools/fuzz/x86_instruction_emulator/Makefile
@@ -35,7 +35,7 @@ x86.h := $(addprefix $(XEN_ROOT)/tools/i
 x86_emulate.h := x86-emulate.h x86_emulate/x86_emulate.h $(x86.h)
 
 OBJS := fuzz-emul.o x86-emulate.o
-OBJS += x86_emulate/0f01.o
+OBJS += x86_emulate/0f01.o x86_emulate/0fae.o
 
 # x86-emulate.c will be implicit for both
 x86-emulate.o x86-emulate-cov.o: x86_emulate/x86_emulate.c $(x86_emulate.h) x86_emulate/private.h
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -251,7 +251,7 @@ xop.h avx512f.h: simd-fma.c
 endif # 32-bit override
 
 OBJS := x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o
-OBJS += x86_emulate/0f01.o
+OBJS += x86_emulate/0f01.o x86_emulate/0fae.o
 
 $(TARGET): $(OBJS)
 	$(HOSTCC) $(HOSTCFLAGS) -o $@ $^
--- /dev/null
+++ b/xen/arch/x86/x86_emulate/0fae.c
@@ -0,0 +1,222 @@
+/******************************************************************************
+ * 0fae.c - helper for x86_emulate.c
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "private.h"
+
+#if defined(__XEN__) && \
+    (!defined(X86EMUL_NO_FPU) || !defined(X86EMUL_NO_MMX) || \
+     !defined(X86EMUL_NO_SIMD))
+# include <asm/xstate.h>
+#endif
+
+int x86emul_0fae(struct x86_emulate_state *s,
+                 struct cpu_user_regs *regs,
+                 struct operand *dst,
+                 const struct operand *src,
+                 struct x86_emulate_ctxt *ctxt,
+                 const struct x86_emulate_ops *ops,
+                 enum x86_emulate_fpu_type *fpu_type)
+#define fpu_type (*fpu_type) /* for get_fpu() */
+{
+    unsigned long cr4;
+    int rc;
+
+    if ( !s->vex.opcx && (!s->vex.pfx || s->vex.pfx == vex_66) )
+    {
+        switch ( s->modrm_reg & 7 )
+        {
+#if !defined(X86EMUL_NO_FPU) || !defined(X86EMUL_NO_MMX) || \
+    !defined(X86EMUL_NO_SIMD)
+        case 0: /* fxsave */
+        case 1: /* fxrstor */
+            generate_exception_if(s->vex.pfx, X86_EXC_UD);
+            vcpu_must_have(fxsr);
+            generate_exception_if(s->ea.type != OP_MEM, X86_EXC_UD);
+            generate_exception_if(!is_aligned(s->ea.mem.seg, s->ea.mem.off, 16,
+                                              ctxt, ops),
+                                  X86_EXC_GP, 0);
+            fail_if(!ops->blk);
+            s->op_bytes =
+#ifdef __x86_64__
+                !mode_64bit() ? offsetof(struct x86_fxsr, xmm[8]) :
+#endif
+                sizeof(struct x86_fxsr);
+            if ( amd_like(ctxt) )
+            {
+                uint64_t msr_val;
+
+                /* Assume "normal" operation in case of missing hooks. */
+                if ( !ops->read_cr ||
+                     ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
+                    cr4 = X86_CR4_OSFXSR;
+                if ( !ops->read_msr ||
+                     ops->read_msr(MSR_EFER, &msr_val, ctxt) != X86EMUL_OKAY )
+                    msr_val = 0;
+                if ( !(cr4 & X86_CR4_OSFXSR) ||
+                     (mode_64bit() && mode_ring0() && (msr_val & EFER_FFXSE)) )
+                    s->op_bytes = offsetof(struct x86_fxsr, xmm[0]);
+            }
+            /*
+             * This could also be X86EMUL_FPU_mmx, but it shouldn't be
+             * X86EMUL_FPU_xmm, as we don't want CR4.OSFXSR checked.
+             */
+            get_fpu(X86EMUL_FPU_fpu);
+            s->fpu_ctrl = true;
+            s->blk = s->modrm_reg & 1 ? blk_fxrstor : blk_fxsave;
+            if ( (rc = ops->blk(s->ea.mem.seg, s->ea.mem.off, NULL,
+                                sizeof(struct x86_fxsr), &regs->eflags,
+                                s, ctxt)) != X86EMUL_OKAY )
+                goto done;
+            break;
+#endif /* X86EMUL_NO_{FPU,MMX,SIMD} */
+
+#ifndef X86EMUL_NO_SIMD
+        case 2: /* ldmxcsr */
+            generate_exception_if(s->vex.pfx, X86_EXC_UD);
+            vcpu_must_have(sse);
+        ldmxcsr:
+            generate_exception_if(src->type != OP_MEM, X86_EXC_UD);
+            get_fpu(s->vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm);
+            generate_exception_if(src->val & ~mxcsr_mask, X86_EXC_GP, 0);
+            asm volatile ( "ldmxcsr %0" :: "m" (src->val) );
+            break;
+
+        case 3: /* stmxcsr */
+            generate_exception_if(s->vex.pfx, X86_EXC_UD);
+            vcpu_must_have(sse);
+        stmxcsr:
+            generate_exception_if(dst->type != OP_MEM, X86_EXC_UD);
+            get_fpu(s->vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm);
+            asm volatile ( "stmxcsr %0" : "=m" (dst->val) );
+            break;
+#endif /* X86EMUL_NO_SIMD */
+
+        case 5: /* lfence */
+            fail_if(s->modrm_mod != 3);
+            generate_exception_if(s->vex.pfx, X86_EXC_UD);
+            vcpu_must_have(sse2);
+            asm volatile ( "lfence" ::: "memory" );
+            break;
+        case 6:
+            if ( s->modrm_mod == 3 ) /* mfence */
+            {
+                generate_exception_if(s->vex.pfx, X86_EXC_UD);
+                vcpu_must_have(sse2);
+                asm volatile ( "mfence" ::: "memory" );
+                break;
+            }
+            /* else clwb */
+            fail_if(!s->vex.pfx);
+            vcpu_must_have(clwb);
+            fail_if(!ops->cache_op);
+            if ( (rc = ops->cache_op(x86emul_clwb, s->ea.mem.seg, s->ea.mem.off,
+                                     ctxt)) != X86EMUL_OKAY )
+                goto done;
+            break;
+        case 7:
+            if ( s->modrm_mod == 3 ) /* sfence */
+            {
+                generate_exception_if(s->vex.pfx, X86_EXC_UD);
+                vcpu_must_have(mmxext);
+                asm volatile ( "sfence" ::: "memory" );
+                break;
+            }
+            /* else clflush{,opt} */
+            if ( !s->vex.pfx )
+                vcpu_must_have(clflush);
+            else
+                vcpu_must_have(clflushopt);
+            fail_if(!ops->cache_op);
+            if ( (rc = ops->cache_op(s->vex.pfx ? x86emul_clflushopt
+                                                : x86emul_clflush,
+                                     s->ea.mem.seg, s->ea.mem.off,
+                                     ctxt)) != X86EMUL_OKAY )
+                goto done;
+            break;
+        default:
+            return X86EMUL_UNIMPLEMENTED;
+        }
+    }
+#ifndef X86EMUL_NO_SIMD
+    else if ( s->vex.opcx && !s->vex.pfx )
+    {
+        switch ( s->modrm_reg & 7 )
+        {
+        case 2: /* vldmxcsr */
+            generate_exception_if(s->vex.l || s->vex.reg != 0xf, X86_EXC_UD);
+            vcpu_must_have(avx);
+            goto ldmxcsr;
+        case 3: /* vstmxcsr */
+            generate_exception_if(s->vex.l || s->vex.reg != 0xf, X86_EXC_UD);
+            vcpu_must_have(avx);
+            goto stmxcsr;
+        }
+        return X86EMUL_UNRECOGNIZED;
+    }
+#endif /* !X86EMUL_NO_SIMD */
+    else if ( !s->vex.opcx && s->vex.pfx == vex_f3 )
+    {
+        enum x86_segment seg;
+        struct segment_register sreg;
+
+        fail_if(s->modrm_mod != 3);
+        generate_exception_if((s->modrm_reg & 4) || !mode_64bit(), X86_EXC_UD);
+        fail_if(!ops->read_cr);
+        if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY )
+            goto done;
+        generate_exception_if(!(cr4 & X86_CR4_FSGSBASE), X86_EXC_UD);
+        seg = s->modrm_reg & 1 ? x86_seg_gs : x86_seg_fs;
+        fail_if(!ops->read_segment);
+        if ( (rc = ops->read_segment(seg, &sreg, ctxt)) != X86EMUL_OKAY )
+            goto done;
+        dst->reg = decode_gpr(regs, s->modrm_rm);
+        if ( !(s->modrm_reg & 2) )
+        {
+            /* rd{f,g}sbase */
+            dst->type = OP_REG;
+            dst->bytes = (s->op_bytes == 8) ? 8 : 4;
+            dst->val = sreg.base;
+        }
+        else
+        {
+            /* wr{f,g}sbase */
+            if ( s->op_bytes == 8 )
+            {
+                sreg.base = *dst->reg;
+                generate_exception_if(!is_canonical_address(sreg.base),
+                                      X86_EXC_GP, 0);
+            }
+            else
+                sreg.base = (uint32_t)*dst->reg;
+            fail_if(!ops->write_segment);
+            if ( (rc = ops->write_segment(seg, &sreg, ctxt)) != X86EMUL_OKAY )
+                goto done;
+        }
+    }
+    else
+    {
+        ASSERT_UNREACHABLE();
+        return X86EMUL_UNRECOGNIZED;
+    }
+
+    rc = X86EMUL_OKAY;
+
+ done:
+    return rc;
+}
--- a/xen/arch/x86/x86_emulate/Makefile
+++ b/xen/arch/x86/x86_emulate/Makefile
@@ -1 +1,2 @@
 obj-y += 0f01.o
+obj-y += 0fae.o
--- a/xen/arch/x86/x86_emulate/private.h
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -308,6 +308,29 @@ struct x86_emulate_state {
 #endif
 };
 
+struct x86_fxsr {
+    uint16_t fcw;
+    uint16_t fsw;
+    uint8_t ftw, :8;
+    uint16_t fop;
+    union {
+        struct {
+            uint32_t offs;
+            uint16_t sel, :16;
+        };
+        uint64_t addr;
+    } fip, fdp;
+    uint32_t mxcsr;
+    uint32_t mxcsr_mask;
+    struct {
+        uint8_t data[10];
+        uint16_t :16, :16, :16;
+    } fpreg[8];
+    uint64_t __attribute__ ((aligned(16))) xmm[16][2];
+    uint64_t rsvd[6];
+    uint64_t avl[6];
+};
+
 /*
  * Externally visible return codes from x86_emulate() are non-negative.
  * Use negative values for internal state change indicators from helpers
@@ -397,6 +420,18 @@ in_protmode(
     (_cpl == 0);                                \
 })
 
+static inline bool
+_amd_like(const struct cpuid_policy *cp)
+{
+    return cp->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON);
+}
+
+static inline bool
+amd_like(const struct x86_emulate_ctxt *ctxt)
+{
+    return _amd_like(ctxt->cpuid);
+}
+
 #define vcpu_has_fpu()         (ctxt->cpuid->basic.fpu)
 #define vcpu_has_sep()         (ctxt->cpuid->basic.sep)
 #define vcpu_has_cx8()         (ctxt->cpuid->basic.cx8)
@@ -501,11 +536,52 @@ in_protmode(
 int x86emul_get_cpl(struct x86_emulate_ctxt *ctxt,
                     const struct x86_emulate_ops *ops);
 
+int x86emul_get_fpu(enum x86_emulate_fpu_type type,
+                    struct x86_emulate_ctxt *ctxt,
+                    const struct x86_emulate_ops *ops);
+
+#define get_fpu(type)                                           \
+do {                                                            \
+    rc = x86emul_get_fpu(fpu_type = (type), ctxt, ops);         \
+    if ( rc ) goto done;                                        \
+} while (0)
+
 int x86emul_0f01(struct x86_emulate_state *s,
                  struct cpu_user_regs *regs,
                  struct operand *dst,
                  struct x86_emulate_ctxt *ctxt,
                  const struct x86_emulate_ops *ops);
+int x86emul_0fae(struct x86_emulate_state *s,
+                 struct cpu_user_regs *regs,
+                 struct operand *dst,
+                 const struct operand *src,
+                 struct x86_emulate_ctxt *ctxt,
+                 const struct x86_emulate_ops *ops,
+                 enum x86_emulate_fpu_type *fpu_type);
+
+static inline bool is_aligned(enum x86_segment seg, unsigned long offs,
+                              unsigned int size, struct x86_emulate_ctxt *ctxt,
+                              const struct x86_emulate_ops *ops)
+{
+    struct segment_register reg;
+
+    /* Expecting powers of two only. */
+    ASSERT(!(size & (size - 1)));
+
+    if ( mode_64bit() && seg < x86_seg_fs )
+        memset(&reg, 0, sizeof(reg));
+    else
+    {
+        /* No alignment checking when we have no way to read segment data. */
+        if ( !ops->read_segment )
+            return true;
+
+        if ( ops->read_segment(seg, &reg, ctxt) != X86EMUL_OKAY )
+            return false;
+    }
+
+    return !((reg.base + offs) & (size - 1));
+}
 
 static inline bool umip_active(struct x86_emulate_ctxt *ctxt,
                                const struct x86_emulate_ops *ops)
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -695,29 +695,6 @@ typedef union {
     uint32_t data32[16];
 } mmval_t;
 
-struct x86_fxsr {
-    uint16_t fcw;
-    uint16_t fsw;
-    uint8_t ftw, :8;
-    uint16_t fop;
-    union {
-        struct {
-            uint32_t offs;
-            uint16_t sel, :16;
-        };
-        uint64_t addr;
-    } fip, fdp;
-    uint32_t mxcsr;
-    uint32_t mxcsr_mask;
-    struct {
-        uint8_t data[10];
-        uint16_t :16, :16, :16;
-    } fpreg[8];
-    uint64_t __attribute__ ((aligned(16))) xmm[16][2];
-    uint64_t rsvd[6];
-    uint64_t avl[6];
-};
-
 /*
  * While proper alignment gets specified above, this doesn't get honored by
  * the compiler for automatic variables. Use this helper to instantiate a
@@ -1063,7 +1040,7 @@ do {
     ops->write_segment(x86_seg_cs, cs, ctxt);                           \
 })
 
-static int _get_fpu(
+int x86emul_get_fpu(
     enum x86_emulate_fpu_type type,
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
@@ -1102,7 +1079,7 @@ static int _get_fpu(
         break;
     }
 
-    rc = ops->get_fpu(type, ctxt);
+    rc = (ops->get_fpu)(type, ctxt);
 
     if ( rc == X86EMUL_OKAY )
     {
@@ -1146,12 +1123,6 @@ static int _get_fpu(
     return rc;
 }
 
-#define get_fpu(type)                                           \
-do {                                                            \
-    rc = _get_fpu(fpu_type = (type), ctxt, ops);                \
-    if ( rc ) goto done;                                        \
-} while (0)
-
 static void put_fpu(
     enum x86_emulate_fpu_type type,
     bool failed_late,
@@ -1556,18 +1527,6 @@ static int ioport_access_check(
     return rc;
 }
 
-static bool
-_amd_like(const struct cpuid_policy *cp)
-{
-    return cp->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON);
-}
-
-static bool
-amd_like(const struct x86_emulate_ctxt *ctxt)
-{
-    return _amd_like(ctxt->cpuid);
-}
-
 /* Initialise output state in x86_emulate_ctxt */
 static void init_context(struct x86_emulate_ctxt *ctxt)
 {
@@ -1980,30 +1939,6 @@ static unsigned int decode_disp8scale(en
     } \
 } while ( false )
 
-static bool is_aligned(enum x86_segment seg, unsigned long offs,
-                       unsigned int size, struct x86_emulate_ctxt *ctxt,
-                       const struct x86_emulate_ops *ops)
-{
-    struct segment_register reg;
-
-    /* Expecting powers of two only. */
-    ASSERT(!(size & (size - 1)));
-
-    if ( mode_64bit() && seg < x86_seg_fs )
-        memset(&reg, 0, sizeof(reg));
-    else
-    {
-        /* No alignment checking when we have no way to read segment data. */
-        if ( !ops->read_segment )
-            return true;
-
-        if ( ops->read_segment(seg, &reg, ctxt) != X86EMUL_OKAY )
-            return false;
-    }
-
-    return !((reg.base + offs) & (size - 1));
-}
-
 static bool is_branch_step(struct x86_emulate_ctxt *ctxt,
                            const struct x86_emulate_ops *ops)
 {
@@ -3346,7 +3281,8 @@ x86_emulate(
 #ifndef X86EMUL_NO_SIMD
     /* With a memory operand, fetch the mask register in use (if any). */
     if ( ea.type == OP_MEM && evex.opmsk &&
-         _get_fpu(fpu_type = X86EMUL_FPU_opmask, ctxt, ops) == X86EMUL_OKAY )
+         x86emul_get_fpu(fpu_type = X86EMUL_FPU_opmask,
+                         ctxt, ops) == X86EMUL_OKAY )
     {
         uint8_t *stb = get_stub(stub);
 
@@ -3369,7 +3305,7 @@ x86_emulate(
 
     if ( fpu_type == X86EMUL_FPU_opmask )
     {
-        /* Squash (side) effects of the _get_fpu() above. */
+        /* Squash (side) effects of the x86emul_get_fpu() above. */
         x86_emul_reset_event(ctxt);
         put_fpu(X86EMUL_FPU_opmask, false, state, ctxt, ops);
         fpu_type = X86EMUL_FPU_none;
@@ -7434,173 +7370,14 @@ x86_emulate(
             emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
         break;
 
-    case X86EMUL_OPC(0x0f, 0xae): case X86EMUL_OPC_66(0x0f, 0xae): /* Grp15 */
-        switch ( modrm_reg & 7 )
-        {
-#if !defined(X86EMUL_NO_FPU) || !defined(X86EMUL_NO_MMX) || \
-    !defined(X86EMUL_NO_SIMD)
-        case 0: /* fxsave */
-        case 1: /* fxrstor */
-            generate_exception_if(vex.pfx, EXC_UD);
-            vcpu_must_have(fxsr);
-            generate_exception_if(ea.type != OP_MEM, EXC_UD);
-            generate_exception_if(!is_aligned(ea.mem.seg, ea.mem.off, 16,
-                                              ctxt, ops),
-                                  EXC_GP, 0);
-            fail_if(!ops->blk);
-            op_bytes =
-#ifdef __x86_64__
-                !mode_64bit() ? offsetof(struct x86_fxsr, xmm[8]) :
-#endif
-                sizeof(struct x86_fxsr);
-            if ( amd_like(ctxt) )
-            {
-                /* Assume "normal" operation in case of missing hooks. */
-                if ( !ops->read_cr ||
-                     ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
-                    cr4 = X86_CR4_OSFXSR;
-                if ( !ops->read_msr ||
-                     ops->read_msr(MSR_EFER, &msr_val, ctxt) != X86EMUL_OKAY )
-                    msr_val = 0;
-                if ( !(cr4 & X86_CR4_OSFXSR) ||
-                     (mode_64bit() && mode_ring0() && (msr_val & EFER_FFXSE)) )
-                    op_bytes = offsetof(struct x86_fxsr, xmm[0]);
-            }
-            /*
-             * This could also be X86EMUL_FPU_mmx, but it shouldn't be
-             * X86EMUL_FPU_xmm, as we don't want CR4.OSFXSR checked.
-             */
-            get_fpu(X86EMUL_FPU_fpu);
-            state->fpu_ctrl = true;
-            state->blk = modrm_reg & 1 ? blk_fxrstor : blk_fxsave;
-            if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL,
-                                sizeof(struct x86_fxsr), &_regs.eflags,
-                                state, ctxt)) != X86EMUL_OKAY )
-                goto done;
-            break;
-#endif /* X86EMUL_NO_{FPU,MMX,SIMD} */
-
-#ifndef X86EMUL_NO_SIMD
-        case 2: /* ldmxcsr */
-            generate_exception_if(vex.pfx, EXC_UD);
-            vcpu_must_have(sse);
-        ldmxcsr:
-            generate_exception_if(src.type != OP_MEM, EXC_UD);
-            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm);
-            generate_exception_if(src.val & ~mxcsr_mask, EXC_GP, 0);
-            asm volatile ( "ldmxcsr %0" :: "m" (src.val) );
-            break;
-
-        case 3: /* stmxcsr */
-            generate_exception_if(vex.pfx, EXC_UD);
-            vcpu_must_have(sse);
-        stmxcsr:
-            generate_exception_if(dst.type != OP_MEM, EXC_UD);
-            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm);
-            asm volatile ( "stmxcsr %0" : "=m" (dst.val) );
-            break;
-#endif /* X86EMUL_NO_SIMD */
-
-        case 5: /* lfence */
-            fail_if(modrm_mod != 3);
-            generate_exception_if(vex.pfx, EXC_UD);
-            vcpu_must_have(sse2);
-            asm volatile ( "lfence" ::: "memory" );
-            break;
-        case 6:
-            if ( modrm_mod == 3 ) /* mfence */
-            {
-                generate_exception_if(vex.pfx, EXC_UD);
-                vcpu_must_have(sse2);
-                asm volatile ( "mfence" ::: "memory" );
-                break;
-            }
-            /* else clwb */
-            fail_if(!vex.pfx);
-            vcpu_must_have(clwb);
-            fail_if(!ops->cache_op);
-            if ( (rc = ops->cache_op(x86emul_clwb, ea.mem.seg, ea.mem.off,
-                                     ctxt)) != X86EMUL_OKAY )
-                goto done;
-            break;
-        case 7:
-            if ( modrm_mod == 3 ) /* sfence */
-            {
-                generate_exception_if(vex.pfx, EXC_UD);
-                vcpu_must_have(mmxext);
-                asm volatile ( "sfence" ::: "memory" );
-                break;
-            }
-            /* else clflush{,opt} */
-            if ( !vex.pfx )
-                vcpu_must_have(clflush);
-            else
-                vcpu_must_have(clflushopt);
-            fail_if(!ops->cache_op);
-            if ( (rc = ops->cache_op(vex.pfx ? x86emul_clflushopt
-                                             : x86emul_clflush,
-                                     ea.mem.seg, ea.mem.off,
-                                     ctxt)) != X86EMUL_OKAY )
-                goto done;
-            break;
-        default:
-            goto unimplemented_insn;
-        }
-        break;
-
+    case X86EMUL_OPC(0x0f, 0xae): /* Grp15 */
+    case X86EMUL_OPC_66(0x0f, 0xae):
+    case X86EMUL_OPC_F3(0x0f, 0xae):
 #ifndef X86EMUL_NO_SIMD
-
-    case X86EMUL_OPC_VEX(0x0f, 0xae): /* Grp15 */
-        switch ( modrm_reg & 7 )
-        {
-        case 2: /* vldmxcsr */
-            generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
-            vcpu_must_have(avx);
-            goto ldmxcsr;
-        case 3: /* vstmxcsr */
-            generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
-            vcpu_must_have(avx);
-            goto stmxcsr;
-        }
-        goto unrecognized_insn;
-
-#endif /* !X86EMUL_NO_SIMD */
-
-    case X86EMUL_OPC_F3(0x0f, 0xae): /* Grp15 */
-        fail_if(modrm_mod != 3);
-        generate_exception_if((modrm_reg & 4) || !mode_64bit(), EXC_UD);
-        fail_if(!ops->read_cr);
-        if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY )
-            goto done;
-        generate_exception_if(!(cr4 & X86_CR4_FSGSBASE), EXC_UD);
-        seg = modrm_reg & 1 ? x86_seg_gs : x86_seg_fs;
-        fail_if(!ops->read_segment);
-        if ( (rc = ops->read_segment(seg, &sreg, ctxt)) != X86EMUL_OKAY )
-            goto done;
-        dst.reg = decode_gpr(&_regs, modrm_rm);
-        if ( !(modrm_reg & 2) )
-        {
-            /* rd{f,g}sbase */
-            dst.type = OP_REG;
-            dst.bytes = (op_bytes == 8) ? 8 : 4;
-            dst.val = sreg.base;
-        }
-        else
-        {
-            /* wr{f,g}sbase */
-            if ( op_bytes == 8 )
-            {
-                sreg.base = *dst.reg;
-                generate_exception_if(!is_canonical_address(sreg.base),
-                                      EXC_GP, 0);
-            }
-            else
-                sreg.base = (uint32_t)*dst.reg;
-            fail_if(!ops->write_segment);
-            if ( (rc = ops->write_segment(seg, &sreg, ctxt)) != X86EMUL_OKAY )
-                goto done;
-        }
-        break;
+    case X86EMUL_OPC_VEX(0x0f, 0xae):
+#endif
+        rc = x86emul_0fae(state, &_regs, &dst, &src, ctxt, ops, &fpu_type);
+        goto dispatch_from_helper;
 
     case X86EMUL_OPC(0x0f, 0xaf): /* imul */
         emulate_2op_SrcV_srcmem("imul", src, dst, _regs.eflags);
@@ -10516,7 +10293,7 @@ x86_emulate(
         goto unrecognized_insn;
 
     default:
-    unimplemented_insn:
+    unimplemented_insn: __maybe_unused;
         rc = X86EMUL_UNIMPLEMENTED;
         goto done;
     unrecognized_insn:



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 3/7] x86emul: split off opcode 0fc7 handling
  2021-08-11 12:21 [PATCH 0/7] x86emul: a few small steps towards disintegration Jan Beulich
  2021-08-11 12:22 ` [PATCH 1/7] x86emul: split off opcode 0f01 handling Jan Beulich
  2021-08-11 12:23 ` [PATCH 2/7] x86emul: split off opcode 0fae handling Jan Beulich
@ 2021-08-11 12:23 ` Jan Beulich
  2021-08-11 12:24 ` [PATCH 4/7] x86emul: split off FPU opcode handling Jan Beulich
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2021-08-11 12:23 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

There's a fair amount of sub-cases (with some yet to be implemented), so
a separate function seems warranted.

Code moved gets slightly adjusted in a few places, e.g. replacing EXC_*
by X86_EXC_* (such that EXC_* don't need to move as well; we want these
to be phased out anyway).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/fuzz/x86_instruction_emulator/Makefile
+++ b/tools/fuzz/x86_instruction_emulator/Makefile
@@ -35,7 +35,7 @@ x86.h := $(addprefix $(XEN_ROOT)/tools/i
 x86_emulate.h := x86-emulate.h x86_emulate/x86_emulate.h $(x86.h)
 
 OBJS := fuzz-emul.o x86-emulate.o
-OBJS += x86_emulate/0f01.o x86_emulate/0fae.o
+OBJS += x86_emulate/0f01.o x86_emulate/0fae.o x86_emulate/0fc7.o
 
 # x86-emulate.c will be implicit for both
 x86-emulate.o x86-emulate-cov.o: x86_emulate/x86_emulate.c $(x86_emulate.h) x86_emulate/private.h
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -251,7 +251,7 @@ xop.h avx512f.h: simd-fma.c
 endif # 32-bit override
 
 OBJS := x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o
-OBJS += x86_emulate/0f01.o x86_emulate/0fae.o
+OBJS += x86_emulate/0f01.o x86_emulate/0fae.o x86_emulate/0fc7.o
 
 $(TARGET): $(OBJS)
 	$(HOSTCC) $(HOSTCFLAGS) -o $@ $^
--- /dev/null
+++ b/xen/arch/x86/x86_emulate/0fc7.c
@@ -0,0 +1,210 @@
+/******************************************************************************
+ * 0fc7.c - helper for x86_emulate.c
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ *
+ * Copyright (c) 2005-2007 Keir Fraser
+ * Copyright (c) 2005-2007 XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "private.h"
+
+/* Avoid namespace pollution. */
+#undef cmpxchg
+
+int x86emul_0fc7(struct x86_emulate_state *s,
+                 struct cpu_user_regs *regs,
+                 struct operand *dst,
+                 struct x86_emulate_ctxt *ctxt,
+                 const struct x86_emulate_ops *ops,
+                 mmval_t *mmvalp)
+{
+    int rc;
+
+    if ( s->ea.type == OP_REG )
+    {
+        bool __maybe_unused carry;
+
+        switch ( s->modrm_reg & 7 )
+        {
+        default:
+            return X86EMUL_UNRECOGNIZED;
+
+        case 6: /* rdrand */
+#ifdef HAVE_AS_RDRAND
+            generate_exception_if(s->vex.pfx >= vex_f3, X86_EXC_UD);
+            host_and_vcpu_must_have(rdrand);
+            *dst = s->ea;
+            switch ( s->op_bytes )
+            {
+            case 2:
+                asm ( "rdrand %w0" ASM_FLAG_OUT(, "; setc %1")
+                      : "=r" (dst->val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                break;
+            default:
+# ifdef __x86_64__
+                asm ( "rdrand %k0" ASM_FLAG_OUT(, "; setc %1")
+                      : "=r" (dst->val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                break;
+            case 8:
+# endif
+                asm ( "rdrand %0" ASM_FLAG_OUT(, "; setc %1")
+                      : "=r" (dst->val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                break;
+            }
+            regs->eflags &= ~EFLAGS_MASK;
+            if ( carry )
+                regs->eflags |= X86_EFLAGS_CF;
+            break;
+#else
+            return X86EMUL_UNIMPLEMENTED;
+#endif
+
+        case 7: /* rdseed / rdpid */
+            if ( s->vex.pfx == vex_f3 ) /* rdpid */
+            {
+                uint64_t msr_val;
+
+                generate_exception_if(s->ea.type != OP_REG, X86_EXC_UD);
+                vcpu_must_have(rdpid);
+                fail_if(!ops->read_msr);
+                if ( (rc = ops->read_msr(MSR_TSC_AUX, &msr_val,
+                                         ctxt)) != X86EMUL_OKAY )
+                    goto done;
+                *dst = s->ea;
+                dst->val = msr_val;
+                dst->bytes = 4;
+                break;
+            }
+#ifdef HAVE_AS_RDSEED
+            generate_exception_if(s->vex.pfx >= vex_f3, X86_EXC_UD);
+            host_and_vcpu_must_have(rdseed);
+            *dst = s->ea;
+            switch ( s->op_bytes )
+            {
+            case 2:
+                asm ( "rdseed %w0" ASM_FLAG_OUT(, "; setc %1")
+                      : "=r" (dst->val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                break;
+            default:
+# ifdef __x86_64__
+                asm ( "rdseed %k0" ASM_FLAG_OUT(, "; setc %1")
+                      : "=r" (dst->val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                break;
+            case 8:
+# endif
+                asm ( "rdseed %0" ASM_FLAG_OUT(, "; setc %1")
+                      : "=r" (dst->val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
+                break;
+            }
+            regs->eflags &= ~EFLAGS_MASK;
+            if ( carry )
+                regs->eflags |= X86_EFLAGS_CF;
+            break;
+#endif
+        }
+    }
+    else
+    {
+        union {
+            uint32_t u32[2];
+            uint64_t u64[2];
+        } *old, *aux;
+
+        /* cmpxchg8b/cmpxchg16b */
+        generate_exception_if((s->modrm_reg & 7) != 1, X86_EXC_UD);
+        fail_if(!ops->cmpxchg);
+        if ( s->rex_prefix & REX_W )
+        {
+            host_and_vcpu_must_have(cx16);
+            generate_exception_if(!is_aligned(s->ea.mem.seg, s->ea.mem.off, 16,
+                                              ctxt, ops),
+                                  X86_EXC_GP, 0);
+            s->op_bytes = 16;
+        }
+        else
+        {
+            vcpu_must_have(cx8);
+            s->op_bytes = 8;
+        }
+
+        old = container_of(&mmvalp->ymm[0], typeof(*old), u64[0]);
+        aux = container_of(&mmvalp->ymm[2], typeof(*aux), u64[0]);
+
+        /* Get actual old value. */
+        if ( (rc = ops->read(s->ea.mem.seg, s->ea.mem.off, old, s->op_bytes,
+                             ctxt)) != X86EMUL_OKAY )
+            goto done;
+
+        /* Get expected value. */
+        if ( s->op_bytes == 8 )
+        {
+            aux->u32[0] = regs->eax;
+            aux->u32[1] = regs->edx;
+        }
+        else
+        {
+            aux->u64[0] = regs->r(ax);
+            aux->u64[1] = regs->r(dx);
+        }
+
+        if ( memcmp(old, aux, s->op_bytes) )
+        {
+        cmpxchgNb_failed:
+            /* Expected != actual: store actual to rDX:rAX and clear ZF. */
+            regs->r(ax) = s->op_bytes == 8 ? old->u32[0] : old->u64[0];
+            regs->r(dx) = s->op_bytes == 8 ? old->u32[1] : old->u64[1];
+            regs->eflags &= ~X86_EFLAGS_ZF;
+        }
+        else
+        {
+            /*
+             * Expected == actual: Get proposed value, attempt atomic cmpxchg
+             * and set ZF if successful.
+             */
+            if ( s->op_bytes == 8 )
+            {
+                aux->u32[0] = regs->ebx;
+                aux->u32[1] = regs->ecx;
+            }
+            else
+            {
+                aux->u64[0] = regs->r(bx);
+                aux->u64[1] = regs->r(cx);
+            }
+
+            switch ( rc = ops->cmpxchg(s->ea.mem.seg, s->ea.mem.off, old, aux,
+                                       s->op_bytes, s->lock_prefix, ctxt) )
+            {
+            case X86EMUL_OKAY:
+                regs->eflags |= X86_EFLAGS_ZF;
+                break;
+
+            case X86EMUL_CMPXCHG_FAILED:
+                rc = X86EMUL_OKAY;
+                goto cmpxchgNb_failed;
+
+            default:
+                goto done;
+            }
+        }
+    }
+
+    rc = X86EMUL_OKAY;
+
+ done:
+    return rc;
+}
--- a/xen/arch/x86/x86_emulate/Makefile
+++ b/xen/arch/x86/x86_emulate/Makefile
@@ -1,2 +1,3 @@
 obj-y += 0f01.o
 obj-y += 0fae.o
+obj-y += 0fc7.o
--- a/xen/arch/x86/x86_emulate/private.h
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -308,6 +308,14 @@ struct x86_emulate_state {
 #endif
 };
 
+typedef union {
+    uint64_t mmx;
+    uint64_t __attribute__ ((aligned(16))) xmm[2];
+    uint64_t __attribute__ ((aligned(32))) ymm[4];
+    uint64_t __attribute__ ((aligned(64))) zmm[8];
+    uint32_t data32[16];
+} mmval_t;
+
 struct x86_fxsr {
     uint16_t fcw;
     uint16_t fsw;
@@ -558,6 +566,12 @@ int x86emul_0fae(struct x86_emulate_stat
                  struct x86_emulate_ctxt *ctxt,
                  const struct x86_emulate_ops *ops,
                  enum x86_emulate_fpu_type *fpu_type);
+int x86emul_0fc7(struct x86_emulate_state *s,
+                 struct cpu_user_regs *regs,
+                 struct operand *dst,
+                 struct x86_emulate_ctxt *ctxt,
+                 const struct x86_emulate_ops *ops,
+                 mmval_t *mmvalp);
 
 static inline bool is_aligned(enum x86_segment seg, unsigned long offs,
                               unsigned int size, struct x86_emulate_ctxt *ctxt,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -687,17 +687,9 @@ struct x87_env32 {
 };
 #endif
 
-typedef union {
-    uint64_t mmx;
-    uint64_t __attribute__ ((aligned(16))) xmm[2];
-    uint64_t __attribute__ ((aligned(32))) ymm[4];
-    uint64_t __attribute__ ((aligned(64))) zmm[8];
-    uint32_t data32[16];
-} mmval_t;
-
 /*
- * While proper alignment gets specified above, this doesn't get honored by
- * the compiler for automatic variables. Use this helper to instantiate a
+ * While proper alignment gets specified in mmval_t, this doesn't get honored
+ * by the compiler for automatic variables. Use this helper to instantiate a
  * suitably aligned variable, producing a pointer to access it.
  */
 #define DECLARE_ALIGNED(type, var)                                        \
@@ -7681,174 +7673,8 @@ x86_emulate(
 #endif /* X86EMUL_NO_SIMD */
 
     case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */
-    {
-        union {
-            uint32_t u32[2];
-            uint64_t u64[2];
-        } *old, *aux;
-
-        if ( ea.type == OP_REG )
-        {
-            bool __maybe_unused carry;
-
-            switch ( modrm_reg & 7 )
-            {
-            default:
-                goto unrecognized_insn;
-
-            case 6: /* rdrand */
-#ifdef HAVE_AS_RDRAND
-                generate_exception_if(rep_prefix(), EXC_UD);
-                host_and_vcpu_must_have(rdrand);
-                dst = ea;
-                switch ( op_bytes )
-                {
-                case 2:
-                    asm ( "rdrand %w0" ASM_FLAG_OUT(, "; setc %1")
-                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
-                    break;
-                default:
-# ifdef __x86_64__
-                    asm ( "rdrand %k0" ASM_FLAG_OUT(, "; setc %1")
-                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
-                    break;
-                case 8:
-# endif
-                    asm ( "rdrand %0" ASM_FLAG_OUT(, "; setc %1")
-                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
-                    break;
-                }
-                _regs.eflags &= ~EFLAGS_MASK;
-                if ( carry )
-                    _regs.eflags |= X86_EFLAGS_CF;
-                break;
-#else
-                goto unimplemented_insn;
-#endif
-
-            case 7: /* rdseed / rdpid */
-                if ( repe_prefix() ) /* rdpid */
-                {
-                    generate_exception_if(ea.type != OP_REG, EXC_UD);
-                    vcpu_must_have(rdpid);
-                    fail_if(!ops->read_msr);
-                    if ( (rc = ops->read_msr(MSR_TSC_AUX, &msr_val,
-                                             ctxt)) != X86EMUL_OKAY )
-                        goto done;
-                    dst = ea;
-                    dst.val = msr_val;
-                    dst.bytes = 4;
-                    break;
-                }
-#ifdef HAVE_AS_RDSEED
-                generate_exception_if(rep_prefix(), EXC_UD);
-                host_and_vcpu_must_have(rdseed);
-                dst = ea;
-                switch ( op_bytes )
-                {
-                case 2:
-                    asm ( "rdseed %w0" ASM_FLAG_OUT(, "; setc %1")
-                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
-                    break;
-                default:
-# ifdef __x86_64__
-                    asm ( "rdseed %k0" ASM_FLAG_OUT(, "; setc %1")
-                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
-                    break;
-                case 8:
-# endif
-                    asm ( "rdseed %0" ASM_FLAG_OUT(, "; setc %1")
-                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
-                    break;
-                }
-                _regs.eflags &= ~EFLAGS_MASK;
-                if ( carry )
-                    _regs.eflags |= X86_EFLAGS_CF;
-                break;
-#endif
-            }
-            break;
-        }
-
-        /* cmpxchg8b/cmpxchg16b */
-        generate_exception_if((modrm_reg & 7) != 1, EXC_UD);
-        fail_if(!ops->cmpxchg);
-        if ( rex_prefix & REX_W )
-        {
-            host_and_vcpu_must_have(cx16);
-            generate_exception_if(!is_aligned(ea.mem.seg, ea.mem.off, 16,
-                                              ctxt, ops),
-                                  EXC_GP, 0);
-            op_bytes = 16;
-        }
-        else
-        {
-            vcpu_must_have(cx8);
-            op_bytes = 8;
-        }
-
-        old = container_of(&mmvalp->ymm[0], typeof(*old), u64[0]);
-        aux = container_of(&mmvalp->ymm[2], typeof(*aux), u64[0]);
-
-        /* Get actual old value. */
-        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, old, op_bytes,
-                             ctxt)) != X86EMUL_OKAY )
-            goto done;
-
-        /* Get expected value. */
-        if ( !(rex_prefix & REX_W) )
-        {
-            aux->u32[0] = _regs.eax;
-            aux->u32[1] = _regs.edx;
-        }
-        else
-        {
-            aux->u64[0] = _regs.r(ax);
-            aux->u64[1] = _regs.r(dx);
-        }
-
-        if ( memcmp(old, aux, op_bytes) )
-        {
-        cmpxchgNb_failed:
-            /* Expected != actual: store actual to rDX:rAX and clear ZF. */
-            _regs.r(ax) = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0];
-            _regs.r(dx) = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1];
-            _regs.eflags &= ~X86_EFLAGS_ZF;
-        }
-        else
-        {
-            /*
-             * Expected == actual: Get proposed value, attempt atomic cmpxchg
-             * and set ZF if successful.
-             */
-            if ( !(rex_prefix & REX_W) )
-            {
-                aux->u32[0] = _regs.ebx;
-                aux->u32[1] = _regs.ecx;
-            }
-            else
-            {
-                aux->u64[0] = _regs.r(bx);
-                aux->u64[1] = _regs.r(cx);
-            }
-
-            switch ( rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
-                                       op_bytes, lock_prefix, ctxt) )
-            {
-            case X86EMUL_OKAY:
-                _regs.eflags |= X86_EFLAGS_ZF;
-                break;
-
-            case X86EMUL_CMPXCHG_FAILED:
-                rc = X86EMUL_OKAY;
-                goto cmpxchgNb_failed;
-
-            default:
-                goto done;
-            }
-        }
-        break;
-    }
+        rc =  x86emul_0fc7(state, &_regs, &dst, ctxt, ops, mmvalp);
+        goto dispatch_from_helper;
 
     case X86EMUL_OPC(0x0f, 0xc8) ... X86EMUL_OPC(0x0f, 0xcf): /* bswap */
         dst.type = OP_REG;



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 4/7] x86emul: split off FPU opcode handling
  2021-08-11 12:21 [PATCH 0/7] x86emul: a few small steps towards disintegration Jan Beulich
                   ` (2 preceding siblings ...)
  2021-08-11 12:23 ` [PATCH 3/7] x86emul: split off opcode 0fc7 handling Jan Beulich
@ 2021-08-11 12:24 ` Jan Beulich
  2021-08-11 12:24 ` [PATCH 5/7] x86emul: split off insn decoding Jan Beulich
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2021-08-11 12:24 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

Some of the helper functions/macros are needed only for this, and the
code is otherwise relatively independent of other parts of the emulator.

Code moved gets slightly adjusted in a few places, e.g. replacing EXC_*
by X86_EXC_* (such that EXC_* don't need to move as well; we want these
to be phased out anyway).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/fuzz/x86_instruction_emulator/Makefile
+++ b/tools/fuzz/x86_instruction_emulator/Makefile
@@ -36,6 +36,7 @@ x86_emulate.h := x86-emulate.h x86_emula
 
 OBJS := fuzz-emul.o x86-emulate.o
 OBJS += x86_emulate/0f01.o x86_emulate/0fae.o x86_emulate/0fc7.o
+OBJS += x86_emulate/fpu.o
 
 # x86-emulate.c will be implicit for both
 x86-emulate.o x86-emulate-cov.o: x86_emulate/x86_emulate.c $(x86_emulate.h) x86_emulate/private.h
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -252,6 +252,7 @@ endif # 32-bit override
 
 OBJS := x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o
 OBJS += x86_emulate/0f01.o x86_emulate/0fae.o x86_emulate/0fc7.o
+OBJS += x86_emulate/fpu.o
 
 $(TARGET): $(OBJS)
 	$(HOSTCC) $(HOSTCFLAGS) -o $@ $^
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -29,12 +29,6 @@
 # define __OP          "r"  /* Operand Prefix */
 #endif
 
-#define get_stub(stb) ({                         \
-    assert(!(stb).addr);                         \
-    (void *)((stb).addr = (uintptr_t)(stb).buf); \
-})
-#define put_stub(stb) ((stb).addr = 0)
-
 uint32_t mxcsr_mask = 0x0000ffbf;
 struct cpuid_policy cp;
 
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -9,7 +9,6 @@
  *    Keir Fraser <keir@xen.org>
  */
 
-#include <xen/domain_page.h>
 #include <xen/err.h>
 #include <xen/event.h>
 #include <asm/x86_emulate.h>
@@ -26,21 +25,6 @@
 #define cpu_has_amd_erratum(nr) \
         cpu_has_amd_erratum(&current_cpu_data, AMD_ERRATUM_##nr)
 
-#define get_stub(stb) ({                                        \
-    BUILD_BUG_ON(STUB_BUF_SIZE / 2 < MAX_INST_LEN + 1);         \
-    ASSERT(!(stb).ptr);                                         \
-    (stb).addr = this_cpu(stubs.addr) + STUB_BUF_SIZE / 2;      \
-    memset(((stb).ptr = map_domain_page(_mfn(this_cpu(stubs.mfn)))) +  \
-           ((stb).addr & ~PAGE_MASK), 0xcc, STUB_BUF_SIZE / 2);        \
-})
-#define put_stub(stb) ({                                   \
-    if ( (stb).ptr )                                       \
-    {                                                      \
-        unmap_domain_page((stb).ptr);                      \
-        (stb).ptr = NULL;                                  \
-    }                                                      \
-})
-
 #define FXSAVE_AREA current->arch.fpu_ctxt
 
 #include "x86_emulate/x86_emulate.c"
--- a/xen/arch/x86/x86_emulate/Makefile
+++ b/xen/arch/x86/x86_emulate/Makefile
@@ -1,3 +1,4 @@
 obj-y += 0f01.o
 obj-y += 0fae.o
 obj-y += 0fc7.o
+obj-$(CONFIG_HVM) += fpu.o
--- /dev/null
+++ b/xen/arch/x86/x86_emulate/fpu.c
@@ -0,0 +1,491 @@
+/******************************************************************************
+ * x86_emulate.c
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ *
+ * Copyright (c) 2005-2007 Keir Fraser
+ * Copyright (c) 2005-2007 XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "private.h"
+
+#ifdef __XEN__
+# include <asm/amd.h>
+# define cpu_has_amd_erratum(nr) \
+         cpu_has_amd_erratum(&current_cpu_data, AMD_ERRATUM_##nr)
+#else
+# define cpu_has_amd_erratum(nr) 0
+#endif
+
+/* Floating point status word definitions. */
+#define FSW_ES    (1U << 7)
+
+static inline bool fpu_check_write(void)
+{
+    uint16_t fsw;
+
+    asm ( "fnstsw %0" : "=am" (fsw) );
+
+    return !(fsw & FSW_ES);
+}
+
+#define emulate_fpu_insn_memdst(opc, ext, arg)                          \
+do {                                                                    \
+    /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */            \
+    *insn_bytes = 2;                                                    \
+    memcpy(get_stub(stub),                                              \
+           ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3);            \
+    invoke_stub("", "", "+m" (arg) : "a" (&(arg)));                     \
+    put_stub(stub);                                                     \
+} while (0)
+
+#define emulate_fpu_insn_memsrc(opc, ext, arg)                          \
+do {                                                                    \
+    /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */            \
+    memcpy(get_stub(stub),                                              \
+           ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3);            \
+    invoke_stub("", "", "=m" (dummy) : "m" (arg), "a" (&(arg)));        \
+    put_stub(stub);                                                     \
+} while (0)
+
+#define emulate_fpu_insn_stub(bytes...)                                 \
+do {                                                                    \
+    unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
+    memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
+    invoke_stub("", "", "=m" (dummy) : "i" (0));                        \
+    put_stub(stub);                                                     \
+} while (0)
+
+#define emulate_fpu_insn_stub_eflags(bytes...)                          \
+do {                                                                    \
+    unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
+    unsigned long tmp_;                                                 \
+    memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
+    invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),             \
+                _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),            \
+                [eflags] "+g" (regs->eflags), [tmp] "=&r" (tmp_)        \
+                : [mask] "i" (X86_EFLAGS_ZF|X86_EFLAGS_PF|X86_EFLAGS_CF)); \
+    put_stub(stub);                                                     \
+} while (0)
+
+int x86emul_fpu(struct x86_emulate_state *s,
+                struct cpu_user_regs *regs,
+                struct operand *dst,
+                struct operand *src,
+                struct x86_emulate_ctxt *ctxt,
+                const struct x86_emulate_ops *ops,
+                unsigned int *insn_bytes,
+                enum x86_emulate_fpu_type *fpu_type,
+#define fpu_type (*fpu_type) /* for get_fpu() */
+                struct stub_exn *stub_exn,
+#define stub_exn (*stub_exn) /* for invoke_stub() */
+                mmval_t *mmvalp)
+{
+    uint8_t b;
+    int rc;
+    struct x86_emulate_stub stub = {};
+
+    switch ( b = ctxt->opcode )
+    {
+        unsigned long dummy;
+
+    case 0x9b:  /* wait/fwait */
+        host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_wait);
+        emulate_fpu_insn_stub(b);
+        break;
+
+    case 0xd8: /* FPU 0xd8 */
+        host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu);
+        switch ( s->modrm )
+        {
+        case 0xc0 ... 0xc7: /* fadd %stN,%st */
+        case 0xc8 ... 0xcf: /* fmul %stN,%st */
+        case 0xd0 ... 0xd7: /* fcom %stN,%st */
+        case 0xd8 ... 0xdf: /* fcomp %stN,%st */
+        case 0xe0 ... 0xe7: /* fsub %stN,%st */
+        case 0xe8 ... 0xef: /* fsubr %stN,%st */
+        case 0xf0 ... 0xf7: /* fdiv %stN,%st */
+        case 0xf8 ... 0xff: /* fdivr %stN,%st */
+            emulate_fpu_insn_stub(0xd8, s->modrm);
+            break;
+        default:
+        fpu_memsrc32:
+            ASSERT(s->ea.type == OP_MEM);
+            if ( (rc = ops->read(s->ea.mem.seg, s->ea.mem.off, &src->val,
+                                 4, ctxt)) != X86EMUL_OKAY )
+                goto done;
+            emulate_fpu_insn_memsrc(b, s->modrm_reg & 7, src->val);
+            break;
+        }
+        break;
+
+    case 0xd9: /* FPU 0xd9 */
+        host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu);
+        switch ( s->modrm )
+        {
+        case 0xfb: /* fsincos */
+            fail_if(cpu_has_amd_erratum(573));
+            /* fall through */
+        case 0xc0 ... 0xc7: /* fld %stN */
+        case 0xc8 ... 0xcf: /* fxch %stN */
+        case 0xd0: /* fnop */
+        case 0xd8 ... 0xdf: /* fstp %stN (alternative encoding) */
+        case 0xe0: /* fchs */
+        case 0xe1: /* fabs */
+        case 0xe4: /* ftst */
+        case 0xe5: /* fxam */
+        case 0xe8: /* fld1 */
+        case 0xe9: /* fldl2t */
+        case 0xea: /* fldl2e */
+        case 0xeb: /* fldpi */
+        case 0xec: /* fldlg2 */
+        case 0xed: /* fldln2 */
+        case 0xee: /* fldz */
+        case 0xf0: /* f2xm1 */
+        case 0xf1: /* fyl2x */
+        case 0xf2: /* fptan */
+        case 0xf3: /* fpatan */
+        case 0xf4: /* fxtract */
+        case 0xf5: /* fprem1 */
+        case 0xf6: /* fdecstp */
+        case 0xf7: /* fincstp */
+        case 0xf8: /* fprem */
+        case 0xf9: /* fyl2xp1 */
+        case 0xfa: /* fsqrt */
+        case 0xfc: /* frndint */
+        case 0xfd: /* fscale */
+        case 0xfe: /* fsin */
+        case 0xff: /* fcos */
+            emulate_fpu_insn_stub(0xd9, s->modrm);
+            break;
+        default:
+            generate_exception_if(s->ea.type != OP_MEM, X86_EXC_UD);
+            switch ( s->modrm_reg & 7 )
+            {
+            case 0: /* fld m32fp */
+                goto fpu_memsrc32;
+            case 2: /* fst m32fp */
+            case 3: /* fstp m32fp */
+            fpu_memdst32:
+                *dst = s->ea;
+                dst->bytes = 4;
+                emulate_fpu_insn_memdst(b, s->modrm_reg & 7, dst->val);
+                break;
+            case 4: /* fldenv */
+                /* Raise #MF now if there are pending unmasked exceptions. */
+                emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */);
+                /* fall through */
+            case 6: /* fnstenv */
+                fail_if(!ops->blk);
+                s->blk = s->modrm_reg & 2 ? blk_fst : blk_fld;
+                /*
+                 * REX is meaningless for these insns by this point - (ab)use
+                 * the field to communicate real vs protected mode to ->blk().
+                 */
+                s->rex_prefix = in_protmode(ctxt, ops);
+                if ( (rc = ops->blk(s->ea.mem.seg, s->ea.mem.off, NULL,
+                                    s->op_bytes > 2 ? sizeof(struct x87_env32)
+                                                    : sizeof(struct x87_env16),
+                                    &regs->eflags,
+                                    s, ctxt)) != X86EMUL_OKAY )
+                    goto done;
+                s->fpu_ctrl = true;
+                break;
+            case 5: /* fldcw m2byte */
+                s->fpu_ctrl = true;
+            fpu_memsrc16:
+                if ( (rc = ops->read(s->ea.mem.seg, s->ea.mem.off, &src->val,
+                                     2, ctxt)) != X86EMUL_OKAY )
+                    goto done;
+                emulate_fpu_insn_memsrc(b, s->modrm_reg & 7, src->val);
+                break;
+            case 7: /* fnstcw m2byte */
+                s->fpu_ctrl = true;
+            fpu_memdst16:
+                *dst = s->ea;
+                dst->bytes = 2;
+                emulate_fpu_insn_memdst(b, s->modrm_reg & 7, dst->val);
+                break;
+            default:
+                generate_exception(X86_EXC_UD);
+            }
+            /*
+             * Control instructions can't raise FPU exceptions, so we need
+             * to consider suppressing writes only for non-control ones.
+             */
+            if ( dst->type == OP_MEM && !s->fpu_ctrl && !fpu_check_write() )
+                dst->type = OP_NONE;
+        }
+        break;
+
+    case 0xda: /* FPU 0xda */
+        host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu);
+        switch ( s->modrm )
+        {
+        case 0xc0 ... 0xc7: /* fcmovb %stN */
+        case 0xc8 ... 0xcf: /* fcmove %stN */
+        case 0xd0 ... 0xd7: /* fcmovbe %stN */
+        case 0xd8 ... 0xdf: /* fcmovu %stN */
+            vcpu_must_have(cmov);
+            emulate_fpu_insn_stub_eflags(0xda, s->modrm);
+            break;
+        case 0xe9:          /* fucompp */
+            emulate_fpu_insn_stub(0xda, s->modrm);
+            break;
+        default:
+            generate_exception_if(s->ea.type != OP_MEM, X86_EXC_UD);
+            goto fpu_memsrc32;
+        }
+        break;
+
+    case 0xdb: /* FPU 0xdb */
+        host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu);
+        switch ( s->modrm )
+        {
+        case 0xc0 ... 0xc7: /* fcmovnb %stN */
+        case 0xc8 ... 0xcf: /* fcmovne %stN */
+        case 0xd0 ... 0xd7: /* fcmovnbe %stN */
+        case 0xd8 ... 0xdf: /* fcmovnu %stN */
+        case 0xe8 ... 0xef: /* fucomi %stN */
+        case 0xf0 ... 0xf7: /* fcomi %stN */
+            vcpu_must_have(cmov);
+            emulate_fpu_insn_stub_eflags(0xdb, s->modrm);
+            break;
+        case 0xe0: /* fneni - 8087 only, ignored by 287 */
+        case 0xe1: /* fndisi - 8087 only, ignored by 287 */
+        case 0xe2: /* fnclex */
+        case 0xe3: /* fninit */
+        case 0xe4: /* fnsetpm - 287 only, ignored by 387 */
+        /* case 0xe5: frstpm - 287 only, #UD on 387 */
+            s->fpu_ctrl = true;
+            emulate_fpu_insn_stub(0xdb, s->modrm);
+            break;
+        default:
+            generate_exception_if(s->ea.type != OP_MEM, X86_EXC_UD);
+            switch ( s->modrm_reg & 7 )
+            {
+            case 0: /* fild m32i */
+                goto fpu_memsrc32;
+            case 1: /* fisttp m32i */
+                host_and_vcpu_must_have(sse3);
+                /* fall through */
+            case 2: /* fist m32i */
+            case 3: /* fistp m32i */
+                goto fpu_memdst32;
+            case 5: /* fld m80fp */
+            fpu_memsrc80:
+                if ( (rc = ops->read(s->ea.mem.seg, s->ea.mem.off, mmvalp,
+                                     10, ctxt)) != X86EMUL_OKAY )
+                    goto done;
+                emulate_fpu_insn_memsrc(b, s->modrm_reg & 7, *mmvalp);
+                break;
+            case 7: /* fstp m80fp */
+            fpu_memdst80:
+                fail_if(!ops->write);
+                emulate_fpu_insn_memdst(b, s->modrm_reg & 7, *mmvalp);
+                if ( fpu_check_write() &&
+                     (rc = ops->write(s->ea.mem.seg, s->ea.mem.off, mmvalp,
+                                      10, ctxt)) != X86EMUL_OKAY )
+                    goto done;
+                break;
+            default:
+                generate_exception(X86_EXC_UD);
+            }
+        }
+        break;
+
+    case 0xdc: /* FPU 0xdc */
+        host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu);
+        switch ( s->modrm )
+        {
+        case 0xc0 ... 0xc7: /* fadd %st,%stN */
+        case 0xc8 ... 0xcf: /* fmul %st,%stN */
+        case 0xd0 ... 0xd7: /* fcom %stN,%st (alternative encoding) */
+        case 0xd8 ... 0xdf: /* fcomp %stN,%st (alternative encoding) */
+        case 0xe0 ... 0xe7: /* fsubr %st,%stN */
+        case 0xe8 ... 0xef: /* fsub %st,%stN */
+        case 0xf0 ... 0xf7: /* fdivr %st,%stN */
+        case 0xf8 ... 0xff: /* fdiv %st,%stN */
+            emulate_fpu_insn_stub(0xdc, s->modrm);
+            break;
+        default:
+        fpu_memsrc64:
+            ASSERT(s->ea.type == OP_MEM);
+            if ( (rc = ops->read(s->ea.mem.seg, s->ea.mem.off, &src->val,
+                                 8, ctxt)) != X86EMUL_OKAY )
+                goto done;
+            emulate_fpu_insn_memsrc(b, s->modrm_reg & 7, src->val);
+            break;
+        }
+        break;
+
+    case 0xdd: /* FPU 0xdd */
+        host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu);
+        switch ( s->modrm )
+        {
+        case 0xc0 ... 0xc7: /* ffree %stN */
+        case 0xc8 ... 0xcf: /* fxch %stN (alternative encoding) */
+        case 0xd0 ... 0xd7: /* fst %stN */
+        case 0xd8 ... 0xdf: /* fstp %stN */
+        case 0xe0 ... 0xe7: /* fucom %stN */
+        case 0xe8 ... 0xef: /* fucomp %stN */
+            emulate_fpu_insn_stub(0xdd, s->modrm);
+            break;
+        default:
+            generate_exception_if(s->ea.type != OP_MEM, X86_EXC_UD);
+            switch ( s->modrm_reg & 7 )
+            {
+            case 0: /* fld m64fp */;
+                goto fpu_memsrc64;
+            case 1: /* fisttp m64i */
+                host_and_vcpu_must_have(sse3);
+                /* fall through */
+            case 2: /* fst m64fp */
+            case 3: /* fstp m64fp */
+            fpu_memdst64:
+                *dst = s->ea;
+                dst->bytes = 8;
+                emulate_fpu_insn_memdst(b, s->modrm_reg & 7, dst->val);
+                break;
+            case 4: /* frstor */
+                /* Raise #MF now if there are pending unmasked exceptions. */
+                emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */);
+                /* fall through */
+            case 6: /* fnsave */
+                fail_if(!ops->blk);
+                s->blk = s->modrm_reg & 2 ? blk_fst : blk_fld;
+                /*
+                 * REX is meaningless for these insns by this point - (ab)use
+                 * the field to communicate real vs protected mode to ->blk().
+                 */
+                s->rex_prefix = in_protmode(ctxt, ops);
+                if ( (rc = ops->blk(s->ea.mem.seg, s->ea.mem.off, NULL,
+                                    s->op_bytes > 2 ? sizeof(struct x87_env32) + 80
+                                                    : sizeof(struct x87_env16) + 80,
+                                    &regs->eflags,
+                                    s, ctxt)) != X86EMUL_OKAY )
+                    goto done;
+                s->fpu_ctrl = true;
+                break;
+            case 7: /* fnstsw m2byte */
+                s->fpu_ctrl = true;
+                goto fpu_memdst16;
+            default:
+                generate_exception(X86_EXC_UD);
+            }
+            /*
+             * Control instructions can't raise FPU exceptions, so we need
+             * to consider suppressing writes only for non-control ones.
+             */
+            if ( dst->type == OP_MEM && !s->fpu_ctrl && !fpu_check_write() )
+                dst->type = OP_NONE;
+        }
+        break;
+
+    case 0xde: /* FPU 0xde */
+        host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu);
+        switch ( s->modrm )
+        {
+        case 0xc0 ... 0xc7: /* faddp %stN */
+        case 0xc8 ... 0xcf: /* fmulp %stN */
+        case 0xd0 ... 0xd7: /* fcomp %stN (alternative encoding) */
+        case 0xd9: /* fcompp */
+        case 0xe0 ... 0xe7: /* fsubrp %stN */
+        case 0xe8 ... 0xef: /* fsubp %stN */
+        case 0xf0 ... 0xf7: /* fdivrp %stN */
+        case 0xf8 ... 0xff: /* fdivp %stN */
+            emulate_fpu_insn_stub(0xde, s->modrm);
+            break;
+        default:
+            generate_exception_if(s->ea.type != OP_MEM, X86_EXC_UD);
+            emulate_fpu_insn_memsrc(b, s->modrm_reg & 7, src->val);
+            break;
+        }
+        break;
+
+    case 0xdf: /* FPU 0xdf */
+        host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu);
+        switch ( s->modrm )
+        {
+        case 0xe0:
+            /* fnstsw %ax */
+            s->fpu_ctrl = true;
+            dst->bytes = 2;
+            dst->type = OP_REG;
+            dst->reg = (void *)&regs->ax;
+            emulate_fpu_insn_memdst(b, s->modrm_reg & 7, dst->val);
+            break;
+        case 0xe8 ... 0xef: /* fucomip %stN */
+        case 0xf0 ... 0xf7: /* fcomip %stN */
+            vcpu_must_have(cmov);
+            emulate_fpu_insn_stub_eflags(0xdf, s->modrm);
+            break;
+        case 0xc0 ... 0xc7: /* ffreep %stN */
+        case 0xc8 ... 0xcf: /* fxch %stN (alternative encoding) */
+        case 0xd0 ... 0xd7: /* fstp %stN (alternative encoding) */
+        case 0xd8 ... 0xdf: /* fstp %stN (alternative encoding) */
+            emulate_fpu_insn_stub(0xdf, s->modrm);
+            break;
+        default:
+            generate_exception_if(s->ea.type != OP_MEM, X86_EXC_UD);
+            switch ( s->modrm_reg & 7 )
+            {
+            case 0: /* fild m16i */
+                goto fpu_memsrc16;
+            case 1: /* fisttp m16i */
+                host_and_vcpu_must_have(sse3);
+                /* fall through */
+            case 2: /* fist m16i */
+            case 3: /* fistp m16i */
+                goto fpu_memdst16;
+            case 4: /* fbld m80dec */
+                goto fpu_memsrc80;
+            case 5: /* fild m64i */
+                dst->type = OP_NONE;
+                goto fpu_memsrc64;
+            case 6: /* fbstp packed bcd */
+                goto fpu_memdst80;
+            case 7: /* fistp m64i */
+                goto fpu_memdst64;
+            }
+        }
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    rc = X86EMUL_OKAY;
+
+ done:
+    put_stub(stub);
+    return rc;
+
+#ifdef __XEN__
+ emulation_stub_failure:
+    return X86EMUL_stub_failure;
+#endif
+}
--- a/xen/arch/x86/x86_emulate/private.h
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -339,12 +339,57 @@ struct x86_fxsr {
     uint64_t avl[6];
 };
 
+#ifndef X86EMUL_NO_FPU
+struct x87_env16 {
+    uint16_t fcw;
+    uint16_t fsw;
+    uint16_t ftw;
+    union {
+        struct {
+            uint16_t fip_lo;
+            uint16_t fop:11, :1, fip_hi:4;
+            uint16_t fdp_lo;
+            uint16_t :12, fdp_hi:4;
+        } real;
+        struct {
+            uint16_t fip;
+            uint16_t fcs;
+            uint16_t fdp;
+            uint16_t fds;
+        } prot;
+    } mode;
+};
+
+struct x87_env32 {
+    uint32_t fcw:16, :16;
+    uint32_t fsw:16, :16;
+    uint32_t ftw:16, :16;
+    union {
+        struct {
+            /* some CPUs/FPUs also store the full FIP here */
+            uint32_t fip_lo:16, :16;
+            uint32_t fop:11, :1, fip_hi:16, :4;
+            /* some CPUs/FPUs also store the full FDP here */
+            uint32_t fdp_lo:16, :16;
+            uint32_t :12, fdp_hi:16, :4;
+        } real;
+        struct {
+            uint32_t fip;
+            uint32_t fcs:16, fop:11, :5;
+            uint32_t fdp;
+            uint32_t fds:16, :16;
+        } prot;
+    } mode;
+};
+#endif
+
 /*
  * Externally visible return codes from x86_emulate() are non-negative.
  * Use negative values for internal state change indicators from helpers
  * to the main function.
  */
 #define X86EMUL_rdtsc        (-1)
+#define X86EMUL_stub_failure (-2)
 
 /*
  * These EFLAGS bits are restored from saved value during emulation, and
@@ -541,6 +586,113 @@ amd_like(const struct x86_emulate_ctxt *
 # define host_and_vcpu_must_have(feat) vcpu_must_have(feat)
 #endif
 
+/*
+ * Instruction emulation:
+ * Most instructions are emulated directly via a fragment of inline assembly
+ * code. This allows us to save/restore EFLAGS and thus very easily pick up
+ * any modified flags.
+ */
+
+#if defined(__x86_64__)
+#define _LO32 "k"          /* force 32-bit operand */
+#define _STK  "%%rsp"      /* stack pointer */
+#define _BYTES_PER_LONG "8"
+#elif defined(__i386__)
+#define _LO32 ""           /* force 32-bit operand */
+#define _STK  "%%esp"      /* stack pointer */
+#define _BYTES_PER_LONG "4"
+#endif
+
+/* Before executing instruction: restore necessary bits in EFLAGS. */
+#define _PRE_EFLAGS(_sav, _msk, _tmp)                           \
+/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
+"movl %"_LO32 _sav",%"_LO32 _tmp"; "                            \
+"push %"_tmp"; "                                                \
+"push %"_tmp"; "                                                \
+"movl %"_msk",%"_LO32 _tmp"; "                                  \
+"andl %"_LO32 _tmp",("_STK"); "                                 \
+"pushf; "                                                       \
+"notl %"_LO32 _tmp"; "                                          \
+"andl %"_LO32 _tmp",("_STK"); "                                 \
+"andl %"_LO32 _tmp",2*"_BYTES_PER_LONG"("_STK"); "              \
+"pop  %"_tmp"; "                                                \
+"orl  %"_LO32 _tmp",("_STK"); "                                 \
+"popf; "                                                        \
+"pop  %"_tmp"; "                                                \
+"movl %"_LO32 _tmp",%"_LO32 _sav"; "
+
+/* After executing instruction: write-back necessary bits in EFLAGS. */
+#define _POST_EFLAGS(_sav, _msk, _tmp)          \
+/* _sav |= EFLAGS & _msk; */                    \
+"pushf; "                                       \
+"pop  %"_tmp"; "                                \
+"andl %"_msk",%"_LO32 _tmp"; "                  \
+"orl  %"_LO32 _tmp",%"_LO32 _sav"; "
+
+#ifdef __XEN__
+
+# include <xen/domain_page.h>
+# include <asm/uaccess.h>
+
+# define get_stub(stb) ({                                             \
+    BUILD_BUG_ON(STUB_BUF_SIZE / 2 < MAX_INST_LEN + 1);               \
+    ASSERT(!(stb).ptr);                                               \
+    (stb).addr = this_cpu(stubs.addr) + STUB_BUF_SIZE / 2;            \
+    memset(((stb).ptr = map_domain_page(_mfn(this_cpu(stubs.mfn)))) + \
+           ((stb).addr & ~PAGE_MASK), 0xcc, STUB_BUF_SIZE / 2);       \
+})
+
+# define put_stub(stb) ({                                  \
+    if ( (stb).ptr )                                       \
+    {                                                      \
+        unmap_domain_page((stb).ptr);                      \
+        (stb).ptr = NULL;                                  \
+    }                                                      \
+})
+
+struct stub_exn {
+    union stub_exception_token info;
+    unsigned int line;
+};
+
+# define invoke_stub(pre, post, constraints...) do {                    \
+    stub_exn.info = (union stub_exception_token) { .raw = ~0 };         \
+    stub_exn.line = __LINE__; /* Utility outweighs livepatching cost */ \
+    block_speculation(); /* SCSB */                                     \
+    asm volatile ( pre "\n\tINDIRECT_CALL %[stub]\n\t" post "\n"        \
+                   ".Lret%=:\n\t"                                       \
+                   ".pushsection .fixup,\"ax\"\n"                       \
+                   ".Lfix%=:\n\t"                                       \
+                   "pop %[exn]\n\t"                                     \
+                   "jmp .Lret%=\n\t"                                    \
+                   ".popsection\n\t"                                    \
+                   _ASM_EXTABLE(.Lret%=, .Lfix%=)                       \
+                   : [exn] "+g" (stub_exn.info) ASM_CALL_CONSTRAINT,    \
+                     constraints,                                       \
+                     [stub] "r" (stub.func),                            \
+                     "m" (*(uint8_t(*)[MAX_INST_LEN + 1])stub.ptr) );   \
+    if ( unlikely(~stub_exn.info.raw) )                                 \
+        goto emulation_stub_failure;                                    \
+} while (0)
+
+#else /* !__XEN__ */
+
+# define get_stub(stb) ({                        \
+    assert(!(stb).addr);                         \
+    (void *)((stb).addr = (uintptr_t)(stb).buf); \
+})
+
+# define put_stub(stb) ((stb).addr = 0)
+
+struct stub_exn {};
+
+# define invoke_stub(pre, post, constraints...)                         \
+    asm volatile ( pre "\n\tcall *%[stub]\n\t" post                     \
+                   : constraints, [stub] "rm" (stub.func),              \
+                     "m" (*(typeof(stub.buf) *)stub.addr) )
+
+#endif /* __XEN__ */
+
 int x86emul_get_cpl(struct x86_emulate_ctxt *ctxt,
                     const struct x86_emulate_ops *ops);
 
@@ -554,6 +706,16 @@ do {
     if ( rc ) goto done;                                        \
 } while (0)
 
+int x86emul_fpu(struct x86_emulate_state *s,
+                struct cpu_user_regs *regs,
+                struct operand *dst,
+                struct operand *src,
+                struct x86_emulate_ctxt *ctxt,
+                const struct x86_emulate_ops *ops,
+                unsigned int *insn_bytes,
+                enum x86_emulate_fpu_type *fpu_type,
+                struct stub_exn *stub_exn,
+                mmval_t *mmvalp);
 int x86emul_0f01(struct x86_emulate_state *s,
                  struct cpu_user_regs *regs,
                  struct operand *dst,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -643,50 +643,6 @@ static const uint8_t sse_prefix[] = { 0x
 #define PTR_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */
 #endif
 
-#ifndef X86EMUL_NO_FPU
-struct x87_env16 {
-    uint16_t fcw;
-    uint16_t fsw;
-    uint16_t ftw;
-    union {
-        struct {
-            uint16_t fip_lo;
-            uint16_t fop:11, :1, fip_hi:4;
-            uint16_t fdp_lo;
-            uint16_t :12, fdp_hi:4;
-        } real;
-        struct {
-            uint16_t fip;
-            uint16_t fcs;
-            uint16_t fdp;
-            uint16_t fds;
-        } prot;
-    } mode;
-};
-
-struct x87_env32 {
-    uint32_t fcw:16, :16;
-    uint32_t fsw:16, :16;
-    uint32_t ftw:16, :16;
-    union {
-        struct {
-            /* some CPUs/FPUs also store the full FIP here */
-            uint32_t fip_lo:16, :16;
-            uint32_t fop:11, :1, fip_hi:16, :4;
-            /* some CPUs/FPUs also store the full FDP here */
-            uint32_t fdp_lo:16, :16;
-            uint32_t :12, fdp_hi:16, :4;
-        } real;
-        struct {
-            uint32_t fip;
-            uint32_t fcs:16, fop:11, :5;
-            uint32_t fdp;
-            uint32_t fds:16, :16;
-        } prot;
-    } mode;
-};
-#endif
-
 /*
  * While proper alignment gets specified in mmval_t, this doesn't get honored
  * by the compiler for automatic variables. Use this helper to instantiate a
@@ -704,9 +660,6 @@ struct x87_env32 {
 # define ASM_FLAG_OUT(yes, no) no
 #endif
 
-/* Floating point status word definitions. */
-#define FSW_ES    (1U << 7)
-
 /* MXCSR bit definitions. */
 #define MXCSR_MM  (1U << 17)
 
@@ -737,49 +690,6 @@ struct x87_env32 {
 #define ECODE_IDT (1 << 1)
 #define ECODE_TI  (1 << 2)
 
-/*
- * Instruction emulation:
- * Most instructions are emulated directly via a fragment of inline assembly
- * code. This allows us to save/restore EFLAGS and thus very easily pick up
- * any modified flags.
- */
-
-#if defined(__x86_64__)
-#define _LO32 "k"          /* force 32-bit operand */
-#define _STK  "%%rsp"      /* stack pointer */
-#define _BYTES_PER_LONG "8"
-#elif defined(__i386__)
-#define _LO32 ""           /* force 32-bit operand */
-#define _STK  "%%esp"      /* stack pointer */
-#define _BYTES_PER_LONG "4"
-#endif
-
-/* Before executing instruction: restore necessary bits in EFLAGS. */
-#define _PRE_EFLAGS(_sav, _msk, _tmp)                           \
-/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
-"movl %"_LO32 _sav",%"_LO32 _tmp"; "                            \
-"push %"_tmp"; "                                                \
-"push %"_tmp"; "                                                \
-"movl %"_msk",%"_LO32 _tmp"; "                                  \
-"andl %"_LO32 _tmp",("_STK"); "                                 \
-"pushf; "                                                       \
-"notl %"_LO32 _tmp"; "                                          \
-"andl %"_LO32 _tmp",("_STK"); "                                 \
-"andl %"_LO32 _tmp",2*"_BYTES_PER_LONG"("_STK"); "              \
-"pop  %"_tmp"; "                                                \
-"orl  %"_LO32 _tmp",("_STK"); "                                 \
-"popf; "                                                        \
-"pop  %"_tmp"; "                                                \
-"movl %"_LO32 _tmp",%"_LO32 _sav"; "
-
-/* After executing instruction: write-back necessary bits in EFLAGS. */
-#define _POST_EFLAGS(_sav, _msk, _tmp)          \
-/* _sav |= EFLAGS & _msk; */                    \
-"pushf; "                                       \
-"pop  %"_tmp"; "                                \
-"andl %"_msk",%"_LO32 _tmp"; "                  \
-"orl  %"_LO32 _tmp",%"_LO32 _sav"; "
-
 /* Raw emulation: instruction has two explicit operands. */
 #define __emulate_2op_nobyte(_op, src, dst, sz, eflags, wsx,wsy,wdx,wdy,   \
                              lsx,lsy,ldx,ldy, qsx,qsy,qdx,qdy, extra...)   \
@@ -913,33 +823,6 @@ do{ asm volatile (
 #define __emulate_1op_8byte(op, dst, eflags, extra...)
 #endif /* __i386__ */
 
-#ifdef __XEN__
-# define invoke_stub(pre, post, constraints...) do {                    \
-    stub_exn.info = (union stub_exception_token) { .raw = ~0 };         \
-    stub_exn.line = __LINE__; /* Utility outweighs livepatching cost */ \
-    block_speculation(); /* SCSB */                                     \
-    asm volatile ( pre "\n\tINDIRECT_CALL %[stub]\n\t" post "\n"        \
-                   ".Lret%=:\n\t"                                       \
-                   ".pushsection .fixup,\"ax\"\n"                       \
-                   ".Lfix%=:\n\t"                                       \
-                   "pop %[exn]\n\t"                                     \
-                   "jmp .Lret%=\n\t"                                    \
-                   ".popsection\n\t"                                    \
-                   _ASM_EXTABLE(.Lret%=, .Lfix%=)                       \
-                   : [exn] "+g" (stub_exn.info) ASM_CALL_CONSTRAINT,    \
-                     constraints,                                       \
-                     [stub] "r" (stub.func),                            \
-                     "m" (*(uint8_t(*)[MAX_INST_LEN + 1])stub.ptr) );   \
-    if ( unlikely(~stub_exn.info.raw) )                                 \
-        goto emulation_stub_failure;                                    \
-} while (0)
-#else
-# define invoke_stub(pre, post, constraints...)                         \
-    asm volatile ( pre "\n\tcall *%[stub]\n\t" post                     \
-                   : constraints, [stub] "rm" (stub.func),              \
-                     "m" (*(typeof(stub.buf) *)stub.addr) )
-#endif
-
 #define emulate_stub(dst, src...) do {                                  \
     unsigned long tmp;                                                  \
     invoke_stub(_PRE_EFLAGS("[efl]", "[msk]", "[tmp]"),                 \
@@ -1162,54 +1045,6 @@ static void put_fpu(
         ops->put_fpu(ctxt, X86EMUL_FPU_none, NULL);
 }
 
-static inline bool fpu_check_write(void)
-{
-    uint16_t fsw;
-
-    asm ( "fnstsw %0" : "=am" (fsw) );
-
-    return !(fsw & FSW_ES);
-}
-
-#define emulate_fpu_insn_memdst(opc, ext, arg)                          \
-do {                                                                    \
-    /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */            \
-    insn_bytes = 2;                                                     \
-    memcpy(get_stub(stub),                                              \
-           ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3);            \
-    invoke_stub("", "", "+m" (arg) : "a" (&(arg)));                     \
-    put_stub(stub);                                                     \
-} while (0)
-
-#define emulate_fpu_insn_memsrc(opc, ext, arg)                          \
-do {                                                                    \
-    /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */            \
-    memcpy(get_stub(stub),                                              \
-           ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3);            \
-    invoke_stub("", "", "=m" (dummy) : "m" (arg), "a" (&(arg)));        \
-    put_stub(stub);                                                     \
-} while (0)
-
-#define emulate_fpu_insn_stub(bytes...)                                 \
-do {                                                                    \
-    unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
-    memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
-    invoke_stub("", "", "=m" (dummy) : "i" (0));                        \
-    put_stub(stub);                                                     \
-} while (0)
-
-#define emulate_fpu_insn_stub_eflags(bytes...)                          \
-do {                                                                    \
-    unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
-    unsigned long tmp_;                                                 \
-    memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
-    invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),             \
-                _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),            \
-                [eflags] "+g" (_regs.eflags), [tmp] "=&r" (tmp_)        \
-                : [mask] "i" (X86_EFLAGS_ZF|X86_EFLAGS_PF|X86_EFLAGS_CF)); \
-    put_stub(stub);                                                     \
-} while (0)
-
 static inline unsigned long get_loop_count(
     const struct cpu_user_regs *regs,
     int ad_bytes)
@@ -3154,12 +2989,7 @@ x86_emulate(
     enum x86_emulate_fpu_type fpu_type = X86EMUL_FPU_none;
     struct x86_emulate_stub stub = {};
     DECLARE_ALIGNED(mmval_t, mmval);
-#ifdef __XEN__
-    struct {
-        union stub_exception_token info;
-        unsigned int line;
-    } stub_exn;
-#endif
+    struct stub_exn stub_exn = {};
 
     ASSERT(ops->read);
 
@@ -3950,10 +3780,10 @@ x86_emulate(
 
 #ifndef X86EMUL_NO_FPU
     case 0x9b:  /* wait/fwait */
-        host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_wait);
-        emulate_fpu_insn_stub(b);
-        break;
+    case 0xd8 ... 0xdf: /* FPU */
+        rc = x86emul_fpu(state, &_regs, &dst, &src, ctxt, ops,
+                         &insn_bytes, &fpu_type, &stub_exn, mmvalp);
+        goto dispatch_from_helper;
 #endif
 
     case 0x9c: /* pushf */
@@ -4364,373 +4194,6 @@ x86_emulate(
         break;
     }
 
-#ifndef X86EMUL_NO_FPU
-    case 0xd8: /* FPU 0xd8 */
-        host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu);
-        switch ( modrm )
-        {
-        case 0xc0 ... 0xc7: /* fadd %stN,%st */
-        case 0xc8 ... 0xcf: /* fmul %stN,%st */
-        case 0xd0 ... 0xd7: /* fcom %stN,%st */
-        case 0xd8 ... 0xdf: /* fcomp %stN,%st */
-        case 0xe0 ... 0xe7: /* fsub %stN,%st */
-        case 0xe8 ... 0xef: /* fsubr %stN,%st */
-        case 0xf0 ... 0xf7: /* fdiv %stN,%st */
-        case 0xf8 ... 0xff: /* fdivr %stN,%st */
-            emulate_fpu_insn_stub(0xd8, modrm);
-            break;
-        default:
-        fpu_memsrc32:
-            ASSERT(ea.type == OP_MEM);
-            if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
-                                 4, ctxt)) != X86EMUL_OKAY )
-                goto done;
-            emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
-            break;
-        }
-        break;
-
-    case 0xd9: /* FPU 0xd9 */
-        host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu);
-        switch ( modrm )
-        {
-        case 0xfb: /* fsincos */
-            fail_if(cpu_has_amd_erratum(573));
-            /* fall through */
-        case 0xc0 ... 0xc7: /* fld %stN */
-        case 0xc8 ... 0xcf: /* fxch %stN */
-        case 0xd0: /* fnop */
-        case 0xd8 ... 0xdf: /* fstp %stN (alternative encoding) */
-        case 0xe0: /* fchs */
-        case 0xe1: /* fabs */
-        case 0xe4: /* ftst */
-        case 0xe5: /* fxam */
-        case 0xe8: /* fld1 */
-        case 0xe9: /* fldl2t */
-        case 0xea: /* fldl2e */
-        case 0xeb: /* fldpi */
-        case 0xec: /* fldlg2 */
-        case 0xed: /* fldln2 */
-        case 0xee: /* fldz */
-        case 0xf0: /* f2xm1 */
-        case 0xf1: /* fyl2x */
-        case 0xf2: /* fptan */
-        case 0xf3: /* fpatan */
-        case 0xf4: /* fxtract */
-        case 0xf5: /* fprem1 */
-        case 0xf6: /* fdecstp */
-        case 0xf7: /* fincstp */
-        case 0xf8: /* fprem */
-        case 0xf9: /* fyl2xp1 */
-        case 0xfa: /* fsqrt */
-        case 0xfc: /* frndint */
-        case 0xfd: /* fscale */
-        case 0xfe: /* fsin */
-        case 0xff: /* fcos */
-            emulate_fpu_insn_stub(0xd9, modrm);
-            break;
-        default:
-            generate_exception_if(ea.type != OP_MEM, EXC_UD);
-            switch ( modrm_reg & 7 )
-            {
-            case 0: /* fld m32fp */
-                goto fpu_memsrc32;
-            case 2: /* fst m32fp */
-            case 3: /* fstp m32fp */
-            fpu_memdst32:
-                dst = ea;
-                dst.bytes = 4;
-                emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val);
-                break;
-            case 4: /* fldenv */
-                /* Raise #MF now if there are pending unmasked exceptions. */
-                emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */);
-                /* fall through */
-            case 6: /* fnstenv */
-                fail_if(!ops->blk);
-                state->blk = modrm_reg & 2 ? blk_fst : blk_fld;
-                /*
-                 * REX is meaningless for these insns by this point - (ab)use
-                 * the field to communicate real vs protected mode to ->blk().
-                 */
-                /*state->*/rex_prefix = in_protmode(ctxt, ops);
-                if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL,
-                                    op_bytes > 2 ? sizeof(struct x87_env32)
-                                                 : sizeof(struct x87_env16),
-                                    &_regs.eflags,
-                                    state, ctxt)) != X86EMUL_OKAY )
-                    goto done;
-                state->fpu_ctrl = true;
-                break;
-            case 5: /* fldcw m2byte */
-                state->fpu_ctrl = true;
-            fpu_memsrc16:
-                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
-                                     2, ctxt)) != X86EMUL_OKAY )
-                    goto done;
-                emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
-                break;
-            case 7: /* fnstcw m2byte */
-                state->fpu_ctrl = true;
-            fpu_memdst16:
-                dst = ea;
-                dst.bytes = 2;
-                emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val);
-                break;
-            default:
-                generate_exception(EXC_UD);
-            }
-            /*
-             * Control instructions can't raise FPU exceptions, so we need
-             * to consider suppressing writes only for non-control ones.
-             */
-            if ( dst.type == OP_MEM && !state->fpu_ctrl && !fpu_check_write() )
-                dst.type = OP_NONE;
-        }
-        break;
-
-    case 0xda: /* FPU 0xda */
-        host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu);
-        switch ( modrm )
-        {
-        case 0xc0 ... 0xc7: /* fcmovb %stN */
-        case 0xc8 ... 0xcf: /* fcmove %stN */
-        case 0xd0 ... 0xd7: /* fcmovbe %stN */
-        case 0xd8 ... 0xdf: /* fcmovu %stN */
-            vcpu_must_have(cmov);
-            emulate_fpu_insn_stub_eflags(0xda, modrm);
-            break;
-        case 0xe9:          /* fucompp */
-            emulate_fpu_insn_stub(0xda, modrm);
-            break;
-        default:
-            generate_exception_if(ea.type != OP_MEM, EXC_UD);
-            goto fpu_memsrc32;
-        }
-        break;
-
-    case 0xdb: /* FPU 0xdb */
-        host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu);
-        switch ( modrm )
-        {
-        case 0xc0 ... 0xc7: /* fcmovnb %stN */
-        case 0xc8 ... 0xcf: /* fcmovne %stN */
-        case 0xd0 ... 0xd7: /* fcmovnbe %stN */
-        case 0xd8 ... 0xdf: /* fcmovnu %stN */
-        case 0xe8 ... 0xef: /* fucomi %stN */
-        case 0xf0 ... 0xf7: /* fcomi %stN */
-            vcpu_must_have(cmov);
-            emulate_fpu_insn_stub_eflags(0xdb, modrm);
-            break;
-        case 0xe0: /* fneni - 8087 only, ignored by 287 */
-        case 0xe1: /* fndisi - 8087 only, ignored by 287 */
-        case 0xe2: /* fnclex */
-        case 0xe3: /* fninit */
-        case 0xe4: /* fnsetpm - 287 only, ignored by 387 */
-        /* case 0xe5: frstpm - 287 only, #UD on 387 */
-            state->fpu_ctrl = true;
-            emulate_fpu_insn_stub(0xdb, modrm);
-            break;
-        default:
-            generate_exception_if(ea.type != OP_MEM, EXC_UD);
-            switch ( modrm_reg & 7 )
-            {
-            case 0: /* fild m32i */
-                goto fpu_memsrc32;
-            case 1: /* fisttp m32i */
-                host_and_vcpu_must_have(sse3);
-                /* fall through */
-            case 2: /* fist m32i */
-            case 3: /* fistp m32i */
-                goto fpu_memdst32;
-            case 5: /* fld m80fp */
-            fpu_memsrc80:
-                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
-                                     10, ctxt)) != X86EMUL_OKAY )
-                    goto done;
-                emulate_fpu_insn_memsrc(b, modrm_reg & 7, *mmvalp);
-                break;
-            case 7: /* fstp m80fp */
-            fpu_memdst80:
-                fail_if(!ops->write);
-                emulate_fpu_insn_memdst(b, modrm_reg & 7, *mmvalp);
-                if ( fpu_check_write() &&
-                     (rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,
-                                      10, ctxt)) != X86EMUL_OKAY )
-                    goto done;
-                break;
-            default:
-                generate_exception(EXC_UD);
-            }
-        }
-        break;
-
-    case 0xdc: /* FPU 0xdc */
-        host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu);
-        switch ( modrm )
-        {
-        case 0xc0 ... 0xc7: /* fadd %st,%stN */
-        case 0xc8 ... 0xcf: /* fmul %st,%stN */
-        case 0xd0 ... 0xd7: /* fcom %stN,%st (alternative encoding) */
-        case 0xd8 ... 0xdf: /* fcomp %stN,%st (alternative encoding) */
-        case 0xe0 ... 0xe7: /* fsubr %st,%stN */
-        case 0xe8 ... 0xef: /* fsub %st,%stN */
-        case 0xf0 ... 0xf7: /* fdivr %st,%stN */
-        case 0xf8 ... 0xff: /* fdiv %st,%stN */
-            emulate_fpu_insn_stub(0xdc, modrm);
-            break;
-        default:
-        fpu_memsrc64:
-            ASSERT(ea.type == OP_MEM);
-            if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
-                                 8, ctxt)) != X86EMUL_OKAY )
-                goto done;
-            emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
-            break;
-        }
-        break;
-
-    case 0xdd: /* FPU 0xdd */
-        host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu);
-        switch ( modrm )
-        {
-        case 0xc0 ... 0xc7: /* ffree %stN */
-        case 0xc8 ... 0xcf: /* fxch %stN (alternative encoding) */
-        case 0xd0 ... 0xd7: /* fst %stN */
-        case 0xd8 ... 0xdf: /* fstp %stN */
-        case 0xe0 ... 0xe7: /* fucom %stN */
-        case 0xe8 ... 0xef: /* fucomp %stN */
-            emulate_fpu_insn_stub(0xdd, modrm);
-            break;
-        default:
-            generate_exception_if(ea.type != OP_MEM, EXC_UD);
-            switch ( modrm_reg & 7 )
-            {
-            case 0: /* fld m64fp */;
-                goto fpu_memsrc64;
-            case 1: /* fisttp m64i */
-                host_and_vcpu_must_have(sse3);
-                /* fall through */
-            case 2: /* fst m64fp */
-            case 3: /* fstp m64fp */
-            fpu_memdst64:
-                dst = ea;
-                dst.bytes = 8;
-                emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val);
-                break;
-            case 4: /* frstor */
-                /* Raise #MF now if there are pending unmasked exceptions. */
-                emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */);
-                /* fall through */
-            case 6: /* fnsave */
-                fail_if(!ops->blk);
-                state->blk = modrm_reg & 2 ? blk_fst : blk_fld;
-                /*
-                 * REX is meaningless for these insns by this point - (ab)use
-                 * the field to communicate real vs protected mode to ->blk().
-                 */
-                /*state->*/rex_prefix = in_protmode(ctxt, ops);
-                if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL,
-                                    op_bytes > 2 ? sizeof(struct x87_env32) + 80
-                                                 : sizeof(struct x87_env16) + 80,
-                                    &_regs.eflags,
-                                    state, ctxt)) != X86EMUL_OKAY )
-                    goto done;
-                state->fpu_ctrl = true;
-                break;
-            case 7: /* fnstsw m2byte */
-                state->fpu_ctrl = true;
-                goto fpu_memdst16;
-            default:
-                generate_exception(EXC_UD);
-            }
-            /*
-             * Control instructions can't raise FPU exceptions, so we need
-             * to consider suppressing writes only for non-control ones.
-             */
-            if ( dst.type == OP_MEM && !state->fpu_ctrl && !fpu_check_write() )
-                dst.type = OP_NONE;
-        }
-        break;
-
-    case 0xde: /* FPU 0xde */
-        host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu);
-        switch ( modrm )
-        {
-        case 0xc0 ... 0xc7: /* faddp %stN */
-        case 0xc8 ... 0xcf: /* fmulp %stN */
-        case 0xd0 ... 0xd7: /* fcomp %stN (alternative encoding) */
-        case 0xd9: /* fcompp */
-        case 0xe0 ... 0xe7: /* fsubrp %stN */
-        case 0xe8 ... 0xef: /* fsubp %stN */
-        case 0xf0 ... 0xf7: /* fdivrp %stN */
-        case 0xf8 ... 0xff: /* fdivp %stN */
-            emulate_fpu_insn_stub(0xde, modrm);
-            break;
-        default:
-            generate_exception_if(ea.type != OP_MEM, EXC_UD);
-            emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
-            break;
-        }
-        break;
-
-    case 0xdf: /* FPU 0xdf */
-        host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu);
-        switch ( modrm )
-        {
-        case 0xe0:
-            /* fnstsw %ax */
-            state->fpu_ctrl = true;
-            dst.bytes = 2;
-            dst.type = OP_REG;
-            dst.reg = (void *)&_regs.ax;
-            emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val);
-            break;
-        case 0xe8 ... 0xef: /* fucomip %stN */
-        case 0xf0 ... 0xf7: /* fcomip %stN */
-            vcpu_must_have(cmov);
-            emulate_fpu_insn_stub_eflags(0xdf, modrm);
-            break;
-        case 0xc0 ... 0xc7: /* ffreep %stN */
-        case 0xc8 ... 0xcf: /* fxch %stN (alternative encoding) */
-        case 0xd0 ... 0xd7: /* fstp %stN (alternative encoding) */
-        case 0xd8 ... 0xdf: /* fstp %stN (alternative encoding) */
-            emulate_fpu_insn_stub(0xdf, modrm);
-            break;
-        default:
-            generate_exception_if(ea.type != OP_MEM, EXC_UD);
-            switch ( modrm_reg & 7 )
-            {
-            case 0: /* fild m16i */
-                goto fpu_memsrc16;
-            case 1: /* fisttp m16i */
-                host_and_vcpu_must_have(sse3);
-                /* fall through */
-            case 2: /* fist m16i */
-            case 3: /* fistp m16i */
-                goto fpu_memdst16;
-            case 4: /* fbld m80dec */
-                goto fpu_memsrc80;
-            case 5: /* fild m64i */
-                dst.type = OP_NONE;
-                goto fpu_memsrc64;
-            case 6: /* fbstp packed bcd */
-                goto fpu_memdst80;
-            case 7: /* fistp m64i */
-                goto fpu_memdst64;
-            }
-        }
-        break;
-#endif /* !X86EMUL_NO_FPU */
-
     case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
         unsigned long count = get_loop_count(&_regs, ad_bytes);
         int do_jmp = !(_regs.eflags & X86_EFLAGS_ZF); /* loopnz */
@@ -10134,6 +9597,11 @@ x86_emulate(
         {
         case X86EMUL_rdtsc:
             goto rdtsc;
+
+#ifdef __XEN__
+        case X86EMUL_stub_failure:
+            goto emulation_stub_failure;
+#endif
         }
 
         /* Internally used state change indicators may not make it here. */



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 5/7] x86emul: split off insn decoding
  2021-08-11 12:21 [PATCH 0/7] x86emul: a few small steps towards disintegration Jan Beulich
                   ` (3 preceding siblings ...)
  2021-08-11 12:24 ` [PATCH 4/7] x86emul: split off FPU opcode handling Jan Beulich
@ 2021-08-11 12:24 ` Jan Beulich
  2021-08-11 12:25 ` [PATCH 6/7] x86emul: move x86_emul_blk() to separate source file Jan Beulich
  2021-08-11 12:25 ` [PATCH 7/7] x86emul: move various utility functions to separate source files Jan Beulich
  6 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2021-08-11 12:24 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

This is a fair chunk of code and data and can easily live separate from
the main emulation function.

Code moved gets slightly adjusted in a few places, e.g. replacing EXC_*
by X86_EXC_* (such that EXC_* don't need to move as well; we want these
to be phased out anyway).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/fuzz/x86_instruction_emulator/Makefile
+++ b/tools/fuzz/x86_instruction_emulator/Makefile
@@ -36,7 +36,7 @@ x86_emulate.h := x86-emulate.h x86_emula
 
 OBJS := fuzz-emul.o x86-emulate.o
 OBJS += x86_emulate/0f01.o x86_emulate/0fae.o x86_emulate/0fc7.o
-OBJS += x86_emulate/fpu.o
+OBJS += x86_emulate/decode.o x86_emulate/fpu.o
 
 # x86-emulate.c will be implicit for both
 x86-emulate.o x86-emulate-cov.o: x86_emulate/x86_emulate.c $(x86_emulate.h) x86_emulate/private.h
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -252,7 +252,7 @@ endif # 32-bit override
 
 OBJS := x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o
 OBJS += x86_emulate/0f01.o x86_emulate/0fae.o x86_emulate/0fc7.o
-OBJS += x86_emulate/fpu.o
+OBJS += x86_emulate/decode.o x86_emulate/fpu.o
 
 $(TARGET): $(OBJS)
 	$(HOSTCC) $(HOSTCFLAGS) -o $@ $^
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -3,11 +3,6 @@
 #include <errno.h>
 #include <sys/mman.h>
 
-#define DEFINE_PER_CPU(type, var) type per_cpu_##var
-#define this_cpu(var) per_cpu_##var
-
-#define ERR_PTR(val) NULL
-
 /* See gcc bug 100680, but here don't bother making this version dependent. */
 #define gcc11_wrap(x) ({                  \
     unsigned long x_;                     \
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -48,6 +48,9 @@
 #define ASSERT assert
 #define ASSERT_UNREACHABLE() assert(!__LINE__)
 
+#define DEFINE_PER_CPU(type, var) type per_cpu_##var
+#define this_cpu(var) per_cpu_##var
+
 #define MASK_EXTR(v, m) (((v) & (m)) / ((m) & -(m)))
 #define MASK_INSR(v, m) (((v) * ((m) & -(m))) & (m))
 
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -9,7 +9,6 @@
  *    Keir Fraser <keir@xen.org>
  */
 
-#include <xen/err.h>
 #include <xen/event.h>
 #include <asm/x86_emulate.h>
 #include <asm/processor.h> /* current_cpu_info */
--- a/xen/arch/x86/x86_emulate/Makefile
+++ b/xen/arch/x86/x86_emulate/Makefile
@@ -1,4 +1,5 @@
 obj-y += 0f01.o
 obj-y += 0fae.o
 obj-y += 0fc7.o
+obj-y += decode.o
 obj-$(CONFIG_HVM) += fpu.o
--- /dev/null
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -0,0 +1,1750 @@
+/******************************************************************************
+ * decode.c - helper for x86_emulate.c
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ *
+ * Copyright (c) 2005-2007 Keir Fraser
+ * Copyright (c) 2005-2007 XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "private.h"
+
+#ifdef __XEN__
+# include <xen/err.h>
+#else
+# define ERR_PTR(val) NULL
+#endif
+
+#define evex_encoded() (s->evex.mbs)
+
+struct x86_emulate_state *
+x86_decode_insn(
+    struct x86_emulate_ctxt *ctxt,
+    int (*insn_fetch)(
+        enum x86_segment seg, unsigned long offset,
+        void *p_data, unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt))
+{
+    static DEFINE_PER_CPU(struct x86_emulate_state, state);
+    struct x86_emulate_state *s = &this_cpu(state);
+    const struct x86_emulate_ops ops = {
+        .insn_fetch = insn_fetch,
+        .read       = x86emul_unhandleable_rw,
+    };
+    int rc;
+
+    init_context(ctxt);
+
+    rc = x86emul_decode(s, ctxt, &ops);
+    if ( unlikely(rc != X86EMUL_OKAY) )
+        return ERR_PTR(-rc);
+
+#if defined(__XEN__) && !defined(NDEBUG)
+    /*
+     * While we avoid memory allocation (by use of per-CPU data) above,
+     * nevertheless make sure callers properly release the state structure
+     * for forward compatibility.
+     */
+    if ( s->caller )
+    {
+        printk(XENLOG_ERR "Unreleased emulation state acquired by %ps\n",
+               s->caller);
+        dump_execution_state();
+    }
+    s->caller = __builtin_return_address(0);
+#endif
+
+    return s;
+}
+
+static const opcode_desc_t opcode_table[256] = {
+    /* 0x00 - 0x07 */
+    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+    ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, ImplicitOps|Mov,
+    /* 0x08 - 0x0F */
+    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+    ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, 0,
+    /* 0x10 - 0x17 */
+    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+    ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, ImplicitOps|Mov,
+    /* 0x18 - 0x1F */
+    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+    ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, ImplicitOps|Mov,
+    /* 0x20 - 0x27 */
+    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+    ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
+    /* 0x28 - 0x2F */
+    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+    ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
+    /* 0x30 - 0x37 */
+    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+    ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
+    /* 0x38 - 0x3F */
+    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+    ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
+    /* 0x40 - 0x4F */
+    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+    /* 0x50 - 0x5F */
+    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
+    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
+    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
+    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
+    /* 0x60 - 0x67 */
+    ImplicitOps, ImplicitOps, DstReg|SrcMem|ModRM, DstReg|SrcNone|ModRM|Mov,
+    0, 0, 0, 0,
+    /* 0x68 - 0x6F */
+    DstImplicit|SrcImm|Mov, DstReg|SrcImm|ModRM|Mov,
+    DstImplicit|SrcImmByte|Mov, DstReg|SrcImmByte|ModRM|Mov,
+    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
+    /* 0x70 - 0x77 */
+    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
+    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
+    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
+    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
+    /* 0x78 - 0x7F */
+    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
+    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
+    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
+    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
+    /* 0x80 - 0x87 */
+    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
+    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
+    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+    /* 0x88 - 0x8F */
+    ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov,
+    ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+    DstMem|SrcReg|ModRM|Mov, DstReg|SrcNone|ModRM,
+    DstReg|SrcMem16|ModRM|Mov, DstMem|SrcNone|ModRM|Mov,
+    /* 0x90 - 0x97 */
+    DstImplicit|SrcEax, DstImplicit|SrcEax,
+    DstImplicit|SrcEax, DstImplicit|SrcEax,
+    DstImplicit|SrcEax, DstImplicit|SrcEax,
+    DstImplicit|SrcEax, DstImplicit|SrcEax,
+    /* 0x98 - 0x9F */
+    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps, ImplicitOps,
+    /* 0xA0 - 0xA7 */
+    ByteOp|DstEax|SrcMem|Mov, DstEax|SrcMem|Mov,
+    ByteOp|DstMem|SrcEax|Mov, DstMem|SrcEax|Mov,
+    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+    ByteOp|ImplicitOps, ImplicitOps,
+    /* 0xA8 - 0xAF */
+    ByteOp|DstEax|SrcImm, DstEax|SrcImm,
+    ByteOp|DstImplicit|SrcEax|Mov, DstImplicit|SrcEax|Mov,
+    ByteOp|DstEax|SrcImplicit|Mov, DstEax|SrcImplicit|Mov,
+    ByteOp|DstImplicit|SrcEax, DstImplicit|SrcEax,
+    /* 0xB0 - 0xB7 */
+    ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
+    ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
+    ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
+    ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
+    /* 0xB8 - 0xBF */
+    DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov,
+    DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov,
+    /* 0xC0 - 0xC7 */
+    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
+    DstImplicit|SrcImm16, ImplicitOps,
+    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+    ByteOp|DstMem|SrcImm|ModRM|Mov, DstMem|SrcImm|ModRM|Mov,
+    /* 0xC8 - 0xCF */
+    DstImplicit|SrcImm16, ImplicitOps, DstImplicit|SrcImm16, ImplicitOps,
+    ImplicitOps, DstImplicit|SrcImmByte, ImplicitOps, ImplicitOps,
+    /* 0xD0 - 0xD7 */
+    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
+    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
+    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte, ImplicitOps, ImplicitOps,
+    /* 0xD8 - 0xDF */
+    ImplicitOps|ModRM, ImplicitOps|ModRM|Mov,
+    ImplicitOps|ModRM, ImplicitOps|ModRM|Mov,
+    ImplicitOps|ModRM, ImplicitOps|ModRM|Mov,
+    DstImplicit|SrcMem16|ModRM, ImplicitOps|ModRM|Mov,
+    /* 0xE0 - 0xE7 */
+    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
+    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
+    DstEax|SrcImmByte, DstEax|SrcImmByte,
+    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
+    /* 0xE8 - 0xEF */
+    DstImplicit|SrcImm|Mov, DstImplicit|SrcImm,
+    ImplicitOps, DstImplicit|SrcImmByte,
+    DstEax|SrcImplicit, DstEax|SrcImplicit, ImplicitOps, ImplicitOps,
+    /* 0xF0 - 0xF7 */
+    0, ImplicitOps, 0, 0,
+    ImplicitOps, ImplicitOps, ByteOp|ModRM, ModRM,
+    /* 0xF8 - 0xFF */
+    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+    ImplicitOps, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
+};
+
+static const struct twobyte_table {
+    opcode_desc_t desc;
+    simd_opsize_t size:4;
+    disp8scale_t d8s:4;
+} twobyte_table[256] = {
+    [0x00] = { ModRM },
+    [0x01] = { ImplicitOps|ModRM },
+    [0x02] = { DstReg|SrcMem16|ModRM },
+    [0x03] = { DstReg|SrcMem16|ModRM },
+    [0x05] = { ImplicitOps },
+    [0x06] = { ImplicitOps },
+    [0x07] = { ImplicitOps },
+    [0x08] = { ImplicitOps },
+    [0x09] = { ImplicitOps },
+    [0x0b] = { ImplicitOps },
+    [0x0d] = { ImplicitOps|ModRM },
+    [0x0e] = { ImplicitOps },
+    [0x0f] = { ModRM|SrcImmByte },
+    [0x10] = { DstImplicit|SrcMem|ModRM|Mov, simd_any_fp, d8s_vl },
+    [0x11] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp, d8s_vl },
+    [0x12] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, 3 },
+    [0x13] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 },
+    [0x14 ... 0x15] = { DstImplicit|SrcMem|ModRM, simd_packed_fp, d8s_vl },
+    [0x16] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, 3 },
+    [0x17] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 },
+    [0x18 ... 0x1f] = { ImplicitOps|ModRM },
+    [0x20 ... 0x21] = { DstMem|SrcImplicit|ModRM },
+    [0x22 ... 0x23] = { DstImplicit|SrcMem|ModRM },
+    [0x28] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
+    [0x29] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_fp, d8s_vl },
+    [0x2a] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, d8s_dq64 },
+    [0x2b] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp, d8s_vl },
+    [0x2c ... 0x2d] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
+    [0x2e ... 0x2f] = { ImplicitOps|ModRM|TwoOp, simd_none, d8s_dq },
+    [0x30 ... 0x35] = { ImplicitOps },
+    [0x37] = { ImplicitOps },
+    [0x38] = { DstReg|SrcMem|ModRM },
+    [0x3a] = { DstReg|SrcImmByte|ModRM },
+    [0x40 ... 0x4f] = { DstReg|SrcMem|ModRM|Mov },
+    [0x50] = { DstReg|SrcImplicit|ModRM|Mov },
+    [0x51] = { DstImplicit|SrcMem|ModRM|TwoOp, simd_any_fp, d8s_vl },
+    [0x52 ... 0x53] = { DstImplicit|SrcMem|ModRM|TwoOp, simd_single_fp },
+    [0x54 ... 0x57] = { DstImplicit|SrcMem|ModRM, simd_packed_fp, d8s_vl },
+    [0x58 ... 0x59] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
+    [0x5a] = { DstImplicit|SrcMem|ModRM|Mov, simd_any_fp, d8s_vl },
+    [0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
+    [0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
+    [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl },
+    [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
+    [0x68 ... 0x6a] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl },
+    [0x6b ... 0x6d] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
+    [0x6e] = { DstImplicit|SrcMem|ModRM|Mov, simd_none, d8s_dq64 },
+    [0x6f] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_int, d8s_vl },
+    [0x70] = { SrcImmByte|ModRM|TwoOp, simd_other, d8s_vl },
+    [0x71 ... 0x73] = { DstImplicit|SrcImmByte|ModRM, simd_none, d8s_vl },
+    [0x74 ... 0x76] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
+    [0x77] = { DstImplicit|SrcNone },
+    [0x78 ... 0x79] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, d8s_vl },
+    [0x7a] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
+    [0x7b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, d8s_dq64 },
+    [0x7c ... 0x7d] = { DstImplicit|SrcMem|ModRM, simd_other },
+    [0x7e] = { DstMem|SrcImplicit|ModRM|Mov, simd_none, d8s_dq64 },
+    [0x7f] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
+    [0x80 ... 0x8f] = { DstImplicit|SrcImm },
+    [0x90 ... 0x9f] = { ByteOp|DstMem|SrcNone|ModRM|Mov },
+    [0xa0 ... 0xa1] = { ImplicitOps|Mov },
+    [0xa2] = { ImplicitOps },
+    [0xa3] = { DstBitBase|SrcReg|ModRM },
+    [0xa4] = { DstMem|SrcImmByte|ModRM },
+    [0xa5] = { DstMem|SrcReg|ModRM },
+    [0xa6 ... 0xa7] = { ModRM },
+    [0xa8 ... 0xa9] = { ImplicitOps|Mov },
+    [0xaa] = { ImplicitOps },
+    [0xab] = { DstBitBase|SrcReg|ModRM },
+    [0xac] = { DstMem|SrcImmByte|ModRM },
+    [0xad] = { DstMem|SrcReg|ModRM },
+    [0xae] = { ImplicitOps|ModRM },
+    [0xaf] = { DstReg|SrcMem|ModRM },
+    [0xb0] = { ByteOp|DstMem|SrcReg|ModRM },
+    [0xb1] = { DstMem|SrcReg|ModRM },
+    [0xb2] = { DstReg|SrcMem|ModRM|Mov },
+    [0xb3] = { DstBitBase|SrcReg|ModRM },
+    [0xb4 ... 0xb5] = { DstReg|SrcMem|ModRM|Mov },
+    [0xb6] = { ByteOp|DstReg|SrcMem|ModRM|Mov },
+    [0xb7] = { DstReg|SrcMem16|ModRM|Mov },
+    [0xb8] = { DstReg|SrcMem|ModRM },
+    [0xb9] = { ModRM },
+    [0xba] = { DstBitBase|SrcImmByte|ModRM },
+    [0xbb] = { DstBitBase|SrcReg|ModRM },
+    [0xbc ... 0xbd] = { DstReg|SrcMem|ModRM },
+    [0xbe] = { ByteOp|DstReg|SrcMem|ModRM|Mov },
+    [0xbf] = { DstReg|SrcMem16|ModRM|Mov },
+    [0xc0] = { ByteOp|DstMem|SrcReg|ModRM },
+    [0xc1] = { DstMem|SrcReg|ModRM },
+    [0xc2] = { DstImplicit|SrcImmByte|ModRM, simd_any_fp, d8s_vl },
+    [0xc3] = { DstMem|SrcReg|ModRM|Mov },
+    [0xc4] = { DstImplicit|SrcImmByte|ModRM, simd_none, 1 },
+    [0xc5] = { DstReg|SrcImmByte|ModRM|Mov },
+    [0xc6] = { DstImplicit|SrcImmByte|ModRM, simd_packed_fp, d8s_vl },
+    [0xc7] = { ImplicitOps|ModRM },
+    [0xc8 ... 0xcf] = { ImplicitOps },
+    [0xd0] = { DstImplicit|SrcMem|ModRM, simd_other },
+    [0xd1 ... 0xd3] = { DstImplicit|SrcMem|ModRM, simd_128, 4 },
+    [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
+    [0xd6] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 },
+    [0xd7] = { DstReg|SrcImplicit|ModRM|Mov },
+    [0xd8 ... 0xdf] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
+    [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
+    [0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_128, 4 },
+    [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
+    [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
+    [0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
+    [0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
+    [0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
+    [0xf1 ... 0xf3] = { DstImplicit|SrcMem|ModRM, simd_128, 4 },
+    [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
+    [0xf7] = { DstMem|SrcMem|ModRM|Mov, simd_packed_int },
+    [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
+    [0xff] = { ModRM }
+};
+
+/*
+ * "two_op" and "four_op" below refer to the number of register operands
+ * (one of which possibly also allowing to be a memory one). The named
+ * operand counts do not include any immediate operands.
+ */
+static const struct ext0f38_table {
+    uint8_t simd_size:5;
+    uint8_t to_mem:1;
+    uint8_t two_op:1;
+    uint8_t vsib:1;
+    disp8scale_t d8s:4;
+} ext0f38_table[256] = {
+    [0x00] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x01 ... 0x03] = { .simd_size = simd_packed_int },
+    [0x04] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x05 ... 0x0a] = { .simd_size = simd_packed_int },
+    [0x0b] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x0c ... 0x0d] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x0e ... 0x0f] = { .simd_size = simd_packed_fp },
+    [0x10 ... 0x12] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x13] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x14 ... 0x16] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0x18] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 2 },
+    [0x19] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 3 },
+    [0x1a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 },
+    [0x1b] = { .simd_size = simd_256, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x1c ... 0x1f] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
+    [0x20] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x21] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
+    [0x22] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_8 },
+    [0x23] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x24] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
+    [0x25] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x26 ... 0x29] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x2a] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
+    [0x2b] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x2c] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x2d] = { .simd_size = simd_packed_fp, .d8s = d8s_dq },
+    [0x2e ... 0x2f] = { .simd_size = simd_packed_fp, .to_mem = 1 },
+    [0x30] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x31] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
+    [0x32] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_8 },
+    [0x33] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x34] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
+    [0x35] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x36 ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x40] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0x42] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0x43] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0x44] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
+    [0x45 ... 0x47] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x4c] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0x4d] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0x4e] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0x4f] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0x50 ... 0x53] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x54 ... 0x55] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
+    [0x58] = { .simd_size = simd_other, .two_op = 1, .d8s = 2 },
+    [0x59] = { .simd_size = simd_other, .two_op = 1, .d8s = 3 },
+    [0x5a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 },
+    [0x5b] = { .simd_size = simd_256, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x62] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_bw },
+    [0x63] = { .simd_size = simd_packed_int, .to_mem = 1, .two_op = 1, .d8s = d8s_bw },
+    [0x64 ... 0x66] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x68] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x70 ... 0x73] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x75 ... 0x76] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x77] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x78] = { .simd_size = simd_other, .two_op = 1 },
+    [0x79] = { .simd_size = simd_other, .two_op = 1, .d8s = 1 },
+    [0x7a ... 0x7c] = { .simd_size = simd_none, .two_op = 1 },
+    [0x7d ... 0x7e] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x7f] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x82] = { .simd_size = simd_other },
+    [0x83] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x88] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_dq },
+    [0x89] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_dq },
+    [0x8a] = { .simd_size = simd_packed_fp, .to_mem = 1, .two_op = 1, .d8s = d8s_dq },
+    [0x8b] = { .simd_size = simd_packed_int, .to_mem = 1, .two_op = 1, .d8s = d8s_dq },
+    [0x8c] = { .simd_size = simd_packed_int },
+    [0x8d] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x8e] = { .simd_size = simd_packed_int, .to_mem = 1 },
+    [0x8f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x90 ... 0x93] = { .simd_size = simd_other, .vsib = 1, .d8s = d8s_dq },
+    [0x96 ... 0x98] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x99] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0x9a] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x9b] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0x9c] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x9d] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0x9e] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x9f] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0xa0 ... 0xa3] = { .simd_size = simd_other, .to_mem = 1, .vsib = 1, .d8s = d8s_dq },
+    [0xa6 ... 0xa8] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0xa9] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0xaa] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0xab] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0xac] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0xad] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0xae] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0xaf] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0xb4 ... 0xb5] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0xb6 ... 0xb8] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0xb9] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0xba] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0xbb] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0xbc] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0xbd] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0xbe] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0xbf] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0xc4] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
+    [0xc6 ... 0xc7] = { .simd_size = simd_other, .vsib = 1, .d8s = d8s_dq },
+    [0xc8] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0xc9] = { .simd_size = simd_other },
+    [0xca] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0xcb] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0xcc] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0xcd] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0xcf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0xdb] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0xdc ... 0xdf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0xf0] = { .two_op = 1 },
+    [0xf1] = { .to_mem = 1, .two_op = 1 },
+    [0xf2 ... 0xf3] = {},
+    [0xf5 ... 0xf7] = {},
+    [0xf8] = { .simd_size = simd_other },
+    [0xf9] = { .to_mem = 1, .two_op = 1 /* Mov */ },
+};
+
+static const struct ext0f3a_table {
+    uint8_t simd_size:5;
+    uint8_t to_mem:1;
+    uint8_t two_op:1;
+    uint8_t four_op:1;
+    disp8scale_t d8s:4;
+} ext0f3a_table[256] = {
+    [0x00] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
+    [0x01] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0x02] = { .simd_size = simd_packed_int },
+    [0x03] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x04 ... 0x05] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0x06] = { .simd_size = simd_packed_fp },
+    [0x08 ... 0x09] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0x0a ... 0x0b] = { .simd_size = simd_scalar_opc, .d8s = d8s_dq },
+    [0x0c ... 0x0d] = { .simd_size = simd_packed_fp },
+    [0x0e] = { .simd_size = simd_packed_int },
+    [0x0f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x14] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 0 },
+    [0x15] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 1 },
+    [0x16] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = d8s_dq64 },
+    [0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 2 },
+    [0x18] = { .simd_size = simd_128, .d8s = 4 },
+    [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1, .d8s = 4 },
+    [0x1a] = { .simd_size = simd_256, .d8s = d8s_vl_by_2 },
+    [0x1b] = { .simd_size = simd_256, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x1e ... 0x1f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x20] = { .simd_size = simd_none, .d8s = 0 },
+    [0x21] = { .simd_size = simd_other, .d8s = 2 },
+    [0x22] = { .simd_size = simd_none, .d8s = d8s_dq64 },
+    [0x23] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x25] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x26] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0x27] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0x30 ... 0x33] = { .simd_size = simd_other, .two_op = 1 },
+    [0x38] = { .simd_size = simd_128, .d8s = 4 },
+    [0x3a] = { .simd_size = simd_256, .d8s = d8s_vl_by_2 },
+    [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1, .d8s = 4 },
+    [0x3b] = { .simd_size = simd_256, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x3e ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x40 ... 0x41] = { .simd_size = simd_packed_fp },
+    [0x42 ... 0x43] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x44] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x46] = { .simd_size = simd_packed_int },
+    [0x48 ... 0x49] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0x50] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x51] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0x54] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x55] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0x56] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0x57] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0x5c ... 0x5f] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x60 ... 0x63] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0x66] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0x67] = { .simd_size = simd_scalar_vexw, .two_op = 1, .d8s = d8s_dq },
+    [0x68 ... 0x69] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x6a ... 0x6b] = { .simd_size = simd_scalar_opc, .four_op = 1 },
+    [0x6c ... 0x6d] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x6e ... 0x6f] = { .simd_size = simd_scalar_opc, .four_op = 1 },
+    [0x70 ... 0x73] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x78 ... 0x79] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x7a ... 0x7b] = { .simd_size = simd_scalar_opc, .four_op = 1 },
+    [0x7c ... 0x7d] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x7e ... 0x7f] = { .simd_size = simd_scalar_opc, .four_op = 1 },
+    [0xcc] = { .simd_size = simd_other },
+    [0xce ... 0xcf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0xdf] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0xf0] = {},
+};
+
+static const opcode_desc_t xop_table[] = {
+    DstReg|SrcImmByte|ModRM,
+    DstReg|SrcMem|ModRM,
+    DstReg|SrcImm|ModRM,
+};
+
+static const struct ext8f08_table {
+    uint8_t simd_size:5;
+    uint8_t two_op:1;
+    uint8_t four_op:1;
+} ext8f08_table[256] = {
+    [0xa2] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0x85 ... 0x87] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0x8e ... 0x8f] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0x95 ... 0x97] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0x9e ... 0x9f] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0xa3] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0xa6] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0xb6] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0xc0 ... 0xc3] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0xcc ... 0xcf] = { .simd_size = simd_packed_int },
+    [0xec ... 0xef] = { .simd_size = simd_packed_int },
+};
+
+static const struct ext8f09_table {
+    uint8_t simd_size:5;
+    uint8_t two_op:1;
+} ext8f09_table[256] = {
+    [0x01 ... 0x02] = { .two_op = 1 },
+    [0x80 ... 0x81] = { .simd_size = simd_packed_fp, .two_op = 1 },
+    [0x82 ... 0x83] = { .simd_size = simd_scalar_opc, .two_op = 1 },
+    [0x90 ... 0x9b] = { .simd_size = simd_packed_int },
+    [0xc1 ... 0xc3] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0xc6 ... 0xc7] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0xcb] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0xd1 ... 0xd3] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0xd6 ... 0xd7] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0xdb] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0xe1 ... 0xe3] = { .simd_size = simd_packed_int, .two_op = 1 },
+};
+
+static unsigned int decode_disp8scale(enum disp8scale scale,
+                                      const struct x86_emulate_state *s)
+{
+    switch ( scale )
+    {
+    case d8s_bw:
+        return s->evex.w;
+
+    default:
+        if ( scale < d8s_vl )
+            return scale;
+        if ( s->evex.brs )
+        {
+    case d8s_dq:
+            return 2 + s->evex.w;
+        }
+        break;
+
+    case d8s_dq64:
+        return 2 + (s->op_bytes == 8);
+    }
+
+    switch ( s->simd_size )
+    {
+    case simd_any_fp:
+    case simd_single_fp:
+        if ( !(s->evex.pfx & VEX_PREFIX_SCALAR_MASK) )
+            break;
+        /* fall through */
+    case simd_scalar_opc:
+    case simd_scalar_vexw:
+        return 2 + s->evex.w;
+
+    case simd_128:
+        /* These should have an explicit size specified. */
+        ASSERT_UNREACHABLE();
+        return 4;
+
+    default:
+        break;
+    }
+
+    return 4 + s->evex.lr - (scale - d8s_vl);
+}
+
+/* Fetch next part of the instruction being emulated. */
+#define insn_fetch_bytes(_size) ({                                    \
+   unsigned long _x = 0, _ip = s->ip;                                 \
+   s->ip += (_size); /* real hardware doesn't truncate */             \
+   generate_exception_if((uint8_t)(s->ip -                            \
+                                   ctxt->regs->r(ip)) > MAX_INST_LEN, \
+                         X86_EXC_GP, 0);                              \
+   rc = ops->insn_fetch(x86_seg_cs, _ip, &_x, _size, ctxt);           \
+   if ( rc ) goto done;                                               \
+   _x;                                                                \
+})
+#define insn_fetch_type(type) ((type)insn_fetch_bytes(sizeof(type)))
+
+static int
+decode_onebyte(struct x86_emulate_state *s,
+               struct x86_emulate_ctxt *ctxt,
+               const struct x86_emulate_ops *ops)
+{
+    int rc = X86EMUL_OKAY;
+
+    switch ( ctxt->opcode )
+    {
+    case 0x06: /* push %%es */
+    case 0x07: /* pop %%es */
+    case 0x0e: /* push %%cs */
+    case 0x16: /* push %%ss */
+    case 0x17: /* pop %%ss */
+    case 0x1e: /* push %%ds */
+    case 0x1f: /* pop %%ds */
+    case 0x27: /* daa */
+    case 0x2f: /* das */
+    case 0x37: /* aaa */
+    case 0x3f: /* aas */
+    case 0x60: /* pusha */
+    case 0x61: /* popa */
+    case 0x62: /* bound */
+    case 0xc4: /* les */
+    case 0xc5: /* lds */
+    case 0xce: /* into */
+    case 0xd4: /* aam */
+    case 0xd5: /* aad */
+    case 0xd6: /* salc */
+        s->not_64bit = true;
+        break;
+
+    case 0x82: /* Grp1 (x86/32 only) */
+        s->not_64bit = true;
+        /* fall through */
+    case 0x80: case 0x81: case 0x83: /* Grp1 */
+        if ( (s->modrm_reg & 7) == 7 ) /* cmp */
+            s->desc = (s->desc & ByteOp) | DstNone | SrcMem;
+        break;
+
+    case 0x90: /* nop / pause */
+        if ( s->vex.pfx == vex_f3 )
+            ctxt->opcode |= X86EMUL_OPC_F3(0, 0);
+        break;
+
+    case 0x9a: /* call (far, absolute) */
+    case 0xea: /* jmp (far, absolute) */
+        generate_exception_if(mode_64bit(), X86_EXC_UD);
+
+        s->imm1 = insn_fetch_bytes(s->op_bytes);
+        s->imm2 = insn_fetch_type(uint16_t);
+        break;
+
+    case 0xa0: case 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
+    case 0xa2: case 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
+        /* Source EA is not encoded via ModRM. */
+        s->ea.type = OP_MEM;
+        s->ea.mem.off = insn_fetch_bytes(s->ad_bytes);
+        break;
+
+    case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */
+        if ( s->op_bytes == 8 ) /* Fetch more bytes to obtain imm64. */
+            s->imm1 = ((uint32_t)s->imm1 |
+                       ((uint64_t)insn_fetch_type(uint32_t) << 32));
+        break;
+
+    case 0xc8: /* enter imm16,imm8 */
+        s->imm2 = insn_fetch_type(uint8_t);
+        break;
+
+    case 0xf6: case 0xf7: /* Grp3 */
+        if ( !(s->modrm_reg & 6) ) /* test */
+            s->desc = (s->desc & ByteOp) | DstNone | SrcMem;
+        break;
+
+    case 0xff: /* Grp5 */
+        switch ( s->modrm_reg & 7 )
+        {
+        case 2: /* call (near) */
+        case 4: /* jmp (near) */
+            if ( mode_64bit() && (s->op_bytes == 4 || !amd_like(ctxt)) )
+                s->op_bytes = 8;
+            s->desc = DstNone | SrcMem | Mov;
+            break;
+
+        case 3: /* call (far, absolute indirect) */
+        case 5: /* jmp (far, absolute indirect) */
+            /* REX.W ignored on a vendor-dependent basis. */
+            if ( s->op_bytes == 8 && amd_like(ctxt) )
+                s->op_bytes = 4;
+            s->desc = DstNone | SrcMem | Mov;
+            break;
+
+        case 6: /* push */
+            if ( mode_64bit() && s->op_bytes == 4 )
+                s->op_bytes = 8;
+            s->desc = DstNone | SrcMem | Mov;
+            break;
+        }
+        break;
+    }
+
+ done:
+    return rc;
+}
+
+static int
+decode_twobyte(struct x86_emulate_state *s,
+               struct x86_emulate_ctxt *ctxt,
+               const struct x86_emulate_ops *ops)
+{
+    int rc = X86EMUL_OKAY;
+
+    switch ( ctxt->opcode & X86EMUL_OPC_MASK )
+    {
+    case 0x00: /* Grp6 */
+        switch ( s->modrm_reg & 6 )
+        {
+        case 0:
+            s->desc |= DstMem | SrcImplicit | Mov;
+            break;
+        case 2: case 4:
+            s->desc |= SrcMem16;
+            break;
+        }
+        break;
+
+    case 0x78:
+        s->desc = ImplicitOps;
+        s->simd_size = simd_none;
+        switch ( s->vex.pfx )
+        {
+        case vex_66: /* extrq $imm8, $imm8, xmm */
+        case vex_f2: /* insertq $imm8, $imm8, xmm, xmm */
+            s->imm1 = insn_fetch_type(uint8_t);
+            s->imm2 = insn_fetch_type(uint8_t);
+            break;
+        }
+        /* fall through */
+    case 0x10 ... 0x18:
+    case 0x28 ... 0x2f:
+    case 0x50 ... 0x77:
+    case 0x7a ... 0x7d:
+    case 0x7f:
+    case 0xc2 ... 0xc3:
+    case 0xc5 ... 0xc6:
+    case 0xd0 ... 0xef:
+    case 0xf1 ... 0xfe:
+        ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
+        break;
+
+    case 0x20: case 0x22: /* mov to/from cr */
+        if ( s->lock_prefix && vcpu_has_cr8_legacy() )
+        {
+            s->modrm_reg += 8;
+            s->lock_prefix = false;
+        }
+        /* fall through */
+    case 0x21: case 0x23: /* mov to/from dr */
+        ASSERT(s->ea.type == OP_REG); /* Early operand adjustment ensures this. */
+        generate_exception_if(s->lock_prefix, X86_EXC_UD);
+        s->op_bytes = mode_64bit() ? 8 : 4;
+        break;
+
+    case 0x79:
+        s->desc = DstReg | SrcMem;
+        s->simd_size = simd_packed_int;
+        ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
+        break;
+
+    case 0x7e:
+        ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
+        if ( s->vex.pfx == vex_f3 ) /* movq xmm/m64,xmm */
+        {
+    case X86EMUL_OPC_VEX_F3(0, 0x7e): /* vmovq xmm/m64,xmm */
+    case X86EMUL_OPC_EVEX_F3(0, 0x7e): /* vmovq xmm/m64,xmm */
+            s->desc = DstImplicit | SrcMem | TwoOp;
+            s->simd_size = simd_other;
+            /* Avoid the s->desc clobbering of TwoOp below. */
+            return X86EMUL_OKAY;
+        }
+        break;
+
+    case X86EMUL_OPC_VEX(0, 0x90):    /* kmov{w,q} */
+    case X86EMUL_OPC_VEX_66(0, 0x90): /* kmov{b,d} */
+        s->desc = DstReg | SrcMem | Mov;
+        s->simd_size = simd_other;
+        break;
+
+    case X86EMUL_OPC_VEX(0, 0x91):    /* kmov{w,q} */
+    case X86EMUL_OPC_VEX_66(0, 0x91): /* kmov{b,d} */
+        s->desc = DstMem | SrcReg | Mov;
+        s->simd_size = simd_other;
+        break;
+
+    case 0xae:
+        ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
+        /* fall through */
+    case X86EMUL_OPC_VEX(0, 0xae):
+        switch ( s->modrm_reg & 7 )
+        {
+        case 2: /* {,v}ldmxcsr */
+            s->desc = DstImplicit | SrcMem | Mov;
+            s->op_bytes = 4;
+            break;
+
+        case 3: /* {,v}stmxcsr */
+            s->desc = DstMem | SrcImplicit | Mov;
+            s->op_bytes = 4;
+            break;
+        }
+        break;
+
+    case 0xb2: /* lss */
+    case 0xb4: /* lfs */
+    case 0xb5: /* lgs */
+        /* REX.W ignored on a vendor-dependent basis. */
+        if ( s->op_bytes == 8 && amd_like(ctxt) )
+            s->op_bytes = 4;
+        break;
+
+    case 0xb8: /* jmpe / popcnt */
+        if ( s->vex.pfx >= vex_f3 )
+            ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
+        break;
+
+        /* Intentionally not handling here despite being modified by F3:
+    case 0xbc: bsf / tzcnt
+    case 0xbd: bsr / lzcnt
+         * They're being dealt with in the execution phase (if at all).
+         */
+
+    case 0xc4: /* pinsrw */
+        ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
+        /* fall through */
+    case X86EMUL_OPC_VEX_66(0, 0xc4): /* vpinsrw */
+    case X86EMUL_OPC_EVEX_66(0, 0xc4): /* vpinsrw */
+        s->desc = DstImplicit | SrcMem16;
+        break;
+
+    case 0xf0:
+        ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
+        if ( s->vex.pfx == vex_f2 ) /* lddqu mem,xmm */
+        {
+        /* fall through */
+    case X86EMUL_OPC_VEX_F2(0, 0xf0): /* vlddqu mem,{x,y}mm */
+            s->desc = DstImplicit | SrcMem | TwoOp;
+            s->simd_size = simd_other;
+            /* Avoid the s->desc clobbering of TwoOp below. */
+            return X86EMUL_OKAY;
+        }
+        break;
+    }
+
+    /*
+     * Scalar forms of most VEX-/EVEX-encoded TwoOp instructions have
+     * three operands.  Those which do really have two operands
+     * should have exited earlier.
+     */
+    if ( s->simd_size && s->vex.opcx &&
+         (s->vex.pfx & VEX_PREFIX_SCALAR_MASK) )
+        s->desc &= ~TwoOp;
+
+ done:
+    return rc;
+}
+
+static int
+decode_0f38(struct x86_emulate_state *s,
+            struct x86_emulate_ctxt *ctxt,
+            const struct x86_emulate_ops *ops)
+{
+    switch ( ctxt->opcode & X86EMUL_OPC_MASK )
+    {
+    case 0x00 ... 0xef:
+    case 0xf2 ... 0xf5:
+    case 0xf7 ... 0xf8:
+    case 0xfa ... 0xff:
+        s->op_bytes = 0;
+        /* fall through */
+    case 0xf6: /* adcx / adox */
+    case 0xf9: /* movdiri */
+        ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
+        break;
+
+    case X86EMUL_OPC_EVEX_66(0, 0x2d): /* vscalefs{s,d} */
+        s->simd_size = simd_scalar_vexw;
+        break;
+
+    case X86EMUL_OPC_EVEX_66(0, 0x7a): /* vpbroadcastb */
+    case X86EMUL_OPC_EVEX_66(0, 0x7b): /* vpbroadcastw */
+    case X86EMUL_OPC_EVEX_66(0, 0x7c): /* vpbroadcast{d,q} */
+        break;
+
+    case 0xf0: /* movbe / crc32 */
+        s->desc |= s->vex.pfx == vex_f2 ? ByteOp : Mov;
+        if ( s->vex.pfx >= vex_f3 )
+            ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
+        break;
+
+    case 0xf1: /* movbe / crc32 */
+        if ( s->vex.pfx == vex_f2 )
+            s->desc = DstReg | SrcMem;
+        if ( s->vex.pfx >= vex_f3 )
+            ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
+        break;
+
+    case X86EMUL_OPC_VEX(0, 0xf2):    /* andn */
+    case X86EMUL_OPC_VEX(0, 0xf3):    /* Grp 17 */
+    case X86EMUL_OPC_VEX(0, 0xf5):    /* bzhi */
+    case X86EMUL_OPC_VEX_F3(0, 0xf5): /* pext */
+    case X86EMUL_OPC_VEX_F2(0, 0xf5): /* pdep */
+    case X86EMUL_OPC_VEX_F2(0, 0xf6): /* mulx */
+    case X86EMUL_OPC_VEX(0, 0xf7):    /* bextr */
+    case X86EMUL_OPC_VEX_66(0, 0xf7): /* shlx */
+    case X86EMUL_OPC_VEX_F3(0, 0xf7): /* sarx */
+    case X86EMUL_OPC_VEX_F2(0, 0xf7): /* shrx */
+        break;
+
+    default:
+        s->op_bytes = 0;
+        break;
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static int
+decode_0f3a(struct x86_emulate_state *s,
+            struct x86_emulate_ctxt *ctxt,
+            const struct x86_emulate_ops *ops)
+{
+    if ( !s->vex.opcx )
+        ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
+
+    switch ( ctxt->opcode & X86EMUL_OPC_MASK )
+    {
+    case X86EMUL_OPC_66(0, 0x14)
+     ... X86EMUL_OPC_66(0, 0x17):     /* pextr*, extractps */
+    case X86EMUL_OPC_VEX_66(0, 0x14)
+     ... X86EMUL_OPC_VEX_66(0, 0x17): /* vpextr*, vextractps */
+    case X86EMUL_OPC_EVEX_66(0, 0x14)
+     ... X86EMUL_OPC_EVEX_66(0, 0x17): /* vpextr*, vextractps */
+    case X86EMUL_OPC_VEX_F2(0, 0xf0): /* rorx */
+        break;
+
+    case X86EMUL_OPC_66(0, 0x20):     /* pinsrb */
+    case X86EMUL_OPC_VEX_66(0, 0x20): /* vpinsrb */
+    case X86EMUL_OPC_EVEX_66(0, 0x20): /* vpinsrb */
+        s->desc = DstImplicit | SrcMem;
+        if ( s->modrm_mod != 3 )
+            s->desc |= ByteOp;
+        break;
+
+    case X86EMUL_OPC_66(0, 0x22):     /* pinsr{d,q} */
+    case X86EMUL_OPC_VEX_66(0, 0x22): /* vpinsr{d,q} */
+    case X86EMUL_OPC_EVEX_66(0, 0x22): /* vpinsr{d,q} */
+        s->desc = DstImplicit | SrcMem;
+        break;
+
+    default:
+        s->op_bytes = 0;
+        break;
+    }
+
+    return X86EMUL_OKAY;
+}
+
+#define ad_bytes (s->ad_bytes) /* for truncate_ea() */
+
+int x86emul_decode(struct x86_emulate_state *s,
+                   struct x86_emulate_ctxt *ctxt,
+                   const struct x86_emulate_ops *ops)
+{
+    uint8_t b, d;
+    unsigned int def_op_bytes, def_ad_bytes, opcode;
+    enum x86_segment override_seg = x86_seg_none;
+    bool pc_rel = false;
+    int rc = X86EMUL_OKAY;
+
+    ASSERT(ops->insn_fetch);
+
+    memset(s, 0, sizeof(*s));
+    s->ea.type = OP_NONE;
+    s->ea.mem.seg = x86_seg_ds;
+    s->ea.reg = PTR_POISON;
+    s->regs = ctxt->regs;
+    s->ip = ctxt->regs->r(ip);
+
+    s->op_bytes = def_op_bytes = ad_bytes = def_ad_bytes =
+        ctxt->addr_size / 8;
+    if ( s->op_bytes == 8 )
+    {
+        s->op_bytes = def_op_bytes = 4;
+#ifndef __x86_64__
+        return X86EMUL_UNHANDLEABLE;
+#endif
+    }
+
+    /* Prefix bytes. */
+    for ( ; ; )
+    {
+        switch ( b = insn_fetch_type(uint8_t) )
+        {
+        case 0x66: /* operand-size override */
+            s->op_bytes = def_op_bytes ^ 6;
+            if ( !s->vex.pfx )
+                s->vex.pfx = vex_66;
+            break;
+        case 0x67: /* address-size override */
+            ad_bytes = def_ad_bytes ^ (mode_64bit() ? 12 : 6);
+            break;
+        case 0x2e: /* CS override / ignored in 64-bit mode */
+            if ( !mode_64bit() )
+                override_seg = x86_seg_cs;
+            break;
+        case 0x3e: /* DS override / ignored in 64-bit mode */
+            if ( !mode_64bit() )
+                override_seg = x86_seg_ds;
+            break;
+        case 0x26: /* ES override / ignored in 64-bit mode */
+            if ( !mode_64bit() )
+                override_seg = x86_seg_es;
+            break;
+        case 0x64: /* FS override */
+            override_seg = x86_seg_fs;
+            break;
+        case 0x65: /* GS override */
+            override_seg = x86_seg_gs;
+            break;
+        case 0x36: /* SS override / ignored in 64-bit mode */
+            if ( !mode_64bit() )
+                override_seg = x86_seg_ss;
+            break;
+        case 0xf0: /* LOCK */
+            s->lock_prefix = true;
+            break;
+        case 0xf2: /* REPNE/REPNZ */
+            s->vex.pfx = vex_f2;
+            break;
+        case 0xf3: /* REP/REPE/REPZ */
+            s->vex.pfx = vex_f3;
+            break;
+        case 0x40 ... 0x4f: /* REX */
+            if ( !mode_64bit() )
+                goto done_prefixes;
+            s->rex_prefix = b;
+            continue;
+        default:
+            goto done_prefixes;
+        }
+
+        /* Any legacy prefix after a REX prefix nullifies its effect. */
+        s->rex_prefix = 0;
+    }
+ done_prefixes:
+
+    if ( s->rex_prefix & REX_W )
+        s->op_bytes = 8;
+
+    /* Opcode byte(s). */
+    d = opcode_table[b];
+    if ( d == 0 && b == 0x0f )
+    {
+        /* Two-byte opcode. */
+        b = insn_fetch_type(uint8_t);
+        d = twobyte_table[b].desc;
+        switch ( b )
+        {
+        default:
+            opcode = b | MASK_INSR(0x0f, X86EMUL_OPC_EXT_MASK);
+            s->ext = ext_0f;
+            s->simd_size = twobyte_table[b].size;
+            break;
+        case 0x38:
+            b = insn_fetch_type(uint8_t);
+            opcode = b | MASK_INSR(0x0f38, X86EMUL_OPC_EXT_MASK);
+            s->ext = ext_0f38;
+            break;
+        case 0x3a:
+            b = insn_fetch_type(uint8_t);
+            opcode = b | MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK);
+            s->ext = ext_0f3a;
+            break;
+        }
+    }
+    else
+        opcode = b;
+
+    /* ModRM and SIB bytes. */
+    if ( d & ModRM )
+    {
+        s->modrm = insn_fetch_type(uint8_t);
+        s->modrm_mod = (s->modrm & 0xc0) >> 6;
+
+        if ( !s->ext && ((b & ~1) == 0xc4 || (b == 0x8f && (s->modrm & 0x18)) ||
+                         b == 0x62) )
+            switch ( def_ad_bytes )
+            {
+            default:
+                BUG(); /* Shouldn't be possible. */
+            case 2:
+                if ( s->regs->eflags & X86_EFLAGS_VM )
+                    break;
+                /* fall through */
+            case 4:
+                if ( s->modrm_mod != 3 || in_realmode(ctxt, ops) )
+                    break;
+                /* fall through */
+            case 8:
+                /* VEX / XOP / EVEX */
+                generate_exception_if(s->rex_prefix || s->vex.pfx, X86_EXC_UD);
+                /*
+                 * With operand size override disallowed (see above), op_bytes
+                 * should not have changed from its default.
+                 */
+                ASSERT(s->op_bytes == def_op_bytes);
+
+                s->vex.raw[0] = s->modrm;
+                if ( b == 0xc5 )
+                {
+                    opcode = X86EMUL_OPC_VEX_;
+                    s->vex.raw[1] = s->modrm;
+                    s->vex.opcx = vex_0f;
+                    s->vex.x = 1;
+                    s->vex.b = 1;
+                    s->vex.w = 0;
+                }
+                else
+                {
+                    s->vex.raw[1] = insn_fetch_type(uint8_t);
+                    if ( mode_64bit() )
+                    {
+                        if ( !s->vex.b )
+                            s->rex_prefix |= REX_B;
+                        if ( !s->vex.x )
+                            s->rex_prefix |= REX_X;
+                        if ( s->vex.w )
+                        {
+                            s->rex_prefix |= REX_W;
+                            s->op_bytes = 8;
+                        }
+                    }
+                    else
+                    {
+                        /* Operand size fixed at 4 (no override via W bit). */
+                        s->op_bytes = 4;
+                        s->vex.b = 1;
+                    }
+                    switch ( b )
+                    {
+                    case 0x62:
+                        opcode = X86EMUL_OPC_EVEX_;
+                        s->evex.raw[0] = s->vex.raw[0];
+                        s->evex.raw[1] = s->vex.raw[1];
+                        s->evex.raw[2] = insn_fetch_type(uint8_t);
+
+                        generate_exception_if(!s->evex.mbs || s->evex.mbz, X86_EXC_UD);
+                        generate_exception_if(!s->evex.opmsk && s->evex.z, X86_EXC_UD);
+
+                        if ( !mode_64bit() )
+                            s->evex.R = 1;
+
+                        s->vex.opcx = s->evex.opcx;
+                        break;
+                    case 0xc4:
+                        opcode = X86EMUL_OPC_VEX_;
+                        break;
+                    default:
+                        opcode = 0;
+                        break;
+                    }
+                }
+                if ( !s->vex.r )
+                    s->rex_prefix |= REX_R;
+
+                s->ext = s->vex.opcx;
+                if ( b != 0x8f )
+                {
+                    b = insn_fetch_type(uint8_t);
+                    switch ( s->ext )
+                    {
+                    case vex_0f:
+                        opcode |= MASK_INSR(0x0f, X86EMUL_OPC_EXT_MASK);
+                        d = twobyte_table[b].desc;
+                        s->simd_size = twobyte_table[b].size;
+                        break;
+                    case vex_0f38:
+                        opcode |= MASK_INSR(0x0f38, X86EMUL_OPC_EXT_MASK);
+                        d = twobyte_table[0x38].desc;
+                        break;
+                    case vex_0f3a:
+                        opcode |= MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK);
+                        d = twobyte_table[0x3a].desc;
+                        break;
+                    default:
+                        rc = X86EMUL_UNRECOGNIZED;
+                        goto done;
+                    }
+                }
+                else if ( s->ext < ext_8f08 + ARRAY_SIZE(xop_table) )
+                {
+                    b = insn_fetch_type(uint8_t);
+                    opcode |= MASK_INSR(0x8f08 + s->ext - ext_8f08,
+                                        X86EMUL_OPC_EXT_MASK);
+                    d = array_access_nospec(xop_table, s->ext - ext_8f08);
+                }
+                else
+                {
+                    rc = X86EMUL_UNRECOGNIZED;
+                    goto done;
+                }
+
+                opcode |= b | MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
+
+                if ( !evex_encoded() )
+                    s->evex.lr = s->vex.l;
+
+                if ( !(d & ModRM) )
+                    break;
+
+                s->modrm = insn_fetch_type(uint8_t);
+                s->modrm_mod = (s->modrm & 0xc0) >> 6;
+
+                break;
+            }
+    }
+
+    if ( d & ModRM )
+    {
+        unsigned int disp8scale = 0;
+
+        d &= ~ModRM;
+#undef ModRM /* Only its aliases are valid to use from here on. */
+        s->modrm_reg = ((s->rex_prefix & 4) << 1) | ((s->modrm & 0x38) >> 3) |
+                       ((evex_encoded() && !s->evex.R) << 4);
+        s->modrm_rm  = s->modrm & 0x07;
+
+        /*
+         * Early operand adjustments. Only ones affecting further processing
+         * prior to the x86_decode_*() calls really belong here. That would
+         * normally be only addition/removal of SrcImm/SrcImm16, so their
+         * fetching can be taken care of by the common code below.
+         */
+        switch ( s->ext )
+        {
+        case ext_none:
+            switch ( b )
+            {
+            case 0xf6 ... 0xf7: /* Grp3 */
+                switch ( s->modrm_reg & 7 )
+                {
+                case 0 ... 1: /* test */
+                    d |= DstMem | SrcImm;
+                    break;
+                case 2: /* not */
+                case 3: /* neg */
+                    d |= DstMem;
+                    break;
+                case 4: /* mul */
+                case 5: /* imul */
+                case 6: /* div */
+                case 7: /* idiv */
+                    /*
+                     * DstEax isn't really precise for all cases; updates to
+                     * rDX get handled in an open coded manner.
+                     */
+                    d |= DstEax | SrcMem;
+                    break;
+                }
+                break;
+            }
+            break;
+
+        case ext_0f:
+            if ( evex_encoded() )
+                disp8scale = decode_disp8scale(twobyte_table[b].d8s, s);
+
+            switch ( b )
+            {
+            case 0x12: /* vmovsldup / vmovddup */
+                if ( s->evex.pfx == vex_f2 )
+                    disp8scale = s->evex.lr ? 4 + s->evex.lr : 3;
+                /* fall through */
+            case 0x16: /* vmovshdup */
+                if ( s->evex.pfx == vex_f3 )
+                    disp8scale = 4 + s->evex.lr;
+                break;
+
+            case 0x20: /* mov cr,reg */
+            case 0x21: /* mov dr,reg */
+            case 0x22: /* mov reg,cr */
+            case 0x23: /* mov reg,dr */
+                /*
+                 * Mov to/from cr/dr ignore the encoding of Mod, and behave as
+                 * if they were encoded as reg/reg instructions.  No further
+                 * disp/SIB bytes are fetched.
+                 */
+                s->modrm_mod = 3;
+                break;
+
+            case 0x78:
+            case 0x79:
+                if ( !s->evex.pfx )
+                    break;
+                /* vcvt{,t}ps2uqq need special casing */
+                if ( s->evex.pfx == vex_66 )
+                {
+                    if ( !s->evex.w && !s->evex.brs )
+                        --disp8scale;
+                    break;
+                }
+                /* vcvt{,t}s{s,d}2usi need special casing: fall through */
+            case 0x2c: /* vcvtts{s,d}2si need special casing */
+            case 0x2d: /* vcvts{s,d}2si need special casing */
+                if ( evex_encoded() )
+                    disp8scale = 2 + (s->evex.pfx & VEX_PREFIX_DOUBLE_MASK);
+                break;
+
+            case 0x5a: /* vcvtps2pd needs special casing */
+                if ( disp8scale && !s->evex.pfx && !s->evex.brs )
+                    --disp8scale;
+                break;
+
+            case 0x7a: /* vcvttps2qq and vcvtudq2pd need special casing */
+                if ( disp8scale && s->evex.pfx != vex_f2 && !s->evex.w && !s->evex.brs )
+                    --disp8scale;
+                break;
+
+            case 0x7b: /* vcvtp{s,d}2qq need special casing */
+                if ( disp8scale && s->evex.pfx == vex_66 )
+                    disp8scale = (s->evex.brs ? 2 : 3 + s->evex.lr) + s->evex.w;
+                break;
+
+            case 0x7e: /* vmovq xmm/m64,xmm needs special casing */
+                if ( disp8scale == 2 && s->evex.pfx == vex_f3 )
+                    disp8scale = 3;
+                break;
+
+            case 0xe6: /* vcvtdq2pd needs special casing */
+                if ( disp8scale && s->evex.pfx == vex_f3 && !s->evex.w && !s->evex.brs )
+                    --disp8scale;
+                break;
+            }
+            break;
+
+        case ext_0f38:
+            d = ext0f38_table[b].to_mem ? DstMem | SrcReg
+                                        : DstReg | SrcMem;
+            if ( ext0f38_table[b].two_op )
+                d |= TwoOp;
+            if ( ext0f38_table[b].vsib )
+                d |= vSIB;
+            s->simd_size = ext0f38_table[b].simd_size;
+            if ( evex_encoded() )
+            {
+                /*
+                 * VPMOVUS* are identical to VPMOVS* Disp8-scaling-wise, but
+                 * their attributes don't match those of the vex_66 encoded
+                 * insns with the same base opcodes. Rather than adding new
+                 * columns to the table, handle this here for now.
+                 */
+                if ( s->evex.pfx != vex_f3 || (b & 0xf8) != 0x10 )
+                    disp8scale = decode_disp8scale(ext0f38_table[b].d8s, s);
+                else
+                {
+                    disp8scale = decode_disp8scale(ext0f38_table[b ^ 0x30].d8s,
+                                                   s);
+                    s->simd_size = simd_other;
+                }
+
+                switch ( b )
+                {
+                /* vp4dpwssd{,s} need special casing */
+                case 0x52: case 0x53:
+                /* v4f{,n}madd{p,s}s need special casing */
+                case 0x9a: case 0x9b: case 0xaa: case 0xab:
+                    if ( s->evex.pfx == vex_f2 )
+                    {
+                        disp8scale = 4;
+                        s->simd_size = simd_128;
+                    }
+                    break;
+                }
+            }
+            break;
+
+        case ext_0f3a:
+            /*
+             * Cannot update d here yet, as the immediate operand still
+             * needs fetching.
+             */
+            s->simd_size = ext0f3a_table[b].simd_size;
+            if ( evex_encoded() )
+                disp8scale = decode_disp8scale(ext0f3a_table[b].d8s, s);
+            break;
+
+        case ext_8f09:
+            if ( ext8f09_table[b].two_op )
+                d |= TwoOp;
+            s->simd_size = ext8f09_table[b].simd_size;
+            break;
+
+        case ext_8f08:
+        case ext_8f0a:
+            /*
+             * Cannot update d here yet, as the immediate operand still
+             * needs fetching.
+             */
+            break;
+
+        default:
+            ASSERT_UNREACHABLE();
+            return X86EMUL_UNIMPLEMENTED;
+        }
+
+        if ( s->modrm_mod == 3 )
+        {
+            generate_exception_if(d & vSIB, X86_EXC_UD);
+            s->modrm_rm |= ((s->rex_prefix & 1) << 3) |
+                           ((evex_encoded() && !s->evex.x) << 4);
+            s->ea.type = OP_REG;
+        }
+        else if ( ad_bytes == 2 )
+        {
+            /* 16-bit ModR/M decode. */
+            generate_exception_if(d & vSIB, X86_EXC_UD);
+            s->ea.type = OP_MEM;
+            switch ( s->modrm_rm )
+            {
+            case 0:
+                s->ea.mem.off = s->regs->bx + s->regs->si;
+                break;
+            case 1:
+                s->ea.mem.off = s->regs->bx + s->regs->di;
+                break;
+            case 2:
+                s->ea.mem.seg = x86_seg_ss;
+                s->ea.mem.off = s->regs->bp + s->regs->si;
+                break;
+            case 3:
+                s->ea.mem.seg = x86_seg_ss;
+                s->ea.mem.off = s->regs->bp + s->regs->di;
+                break;
+            case 4:
+                s->ea.mem.off = s->regs->si;
+                break;
+            case 5:
+                s->ea.mem.off = s->regs->di;
+                break;
+            case 6:
+                if ( s->modrm_mod == 0 )
+                    break;
+                s->ea.mem.seg = x86_seg_ss;
+                s->ea.mem.off = s->regs->bp;
+                break;
+            case 7:
+                s->ea.mem.off = s->regs->bx;
+                break;
+            }
+            switch ( s->modrm_mod )
+            {
+            case 0:
+                if ( s->modrm_rm == 6 )
+                    s->ea.mem.off = insn_fetch_type(int16_t);
+                break;
+            case 1:
+                s->ea.mem.off += insn_fetch_type(int8_t) * (1 << disp8scale);
+                break;
+            case 2:
+                s->ea.mem.off += insn_fetch_type(int16_t);
+                break;
+            }
+        }
+        else
+        {
+            /* 32/64-bit ModR/M decode. */
+            s->ea.type = OP_MEM;
+            if ( s->modrm_rm == 4 )
+            {
+                uint8_t sib = insn_fetch_type(uint8_t);
+                uint8_t sib_base = (sib & 7) | ((s->rex_prefix << 3) & 8);
+
+                s->sib_index = ((sib >> 3) & 7) | ((s->rex_prefix << 2) & 8);
+                s->sib_scale = (sib >> 6) & 3;
+                if ( unlikely(d & vSIB) )
+                    s->sib_index |= (mode_64bit() && evex_encoded() &&
+                                     !s->evex.RX) << 4;
+                else if ( s->sib_index != 4 )
+                {
+                    s->ea.mem.off = *decode_gpr(s->regs, s->sib_index);
+                    s->ea.mem.off <<= s->sib_scale;
+                }
+                if ( (s->modrm_mod == 0) && ((sib_base & 7) == 5) )
+                    s->ea.mem.off += insn_fetch_type(int32_t);
+                else if ( sib_base == 4 )
+                {
+                    s->ea.mem.seg  = x86_seg_ss;
+                    s->ea.mem.off += s->regs->r(sp);
+                    if ( !s->ext && (b == 0x8f) )
+                        /* POP <rm> computes its EA post increment. */
+                        s->ea.mem.off += ((mode_64bit() && (s->op_bytes == 4))
+                                       ? 8 : s->op_bytes);
+                }
+                else if ( sib_base == 5 )
+                {
+                    s->ea.mem.seg  = x86_seg_ss;
+                    s->ea.mem.off += s->regs->r(bp);
+                }
+                else
+                    s->ea.mem.off += *decode_gpr(s->regs, sib_base);
+            }
+            else
+            {
+                generate_exception_if(d & vSIB, X86_EXC_UD);
+                s->modrm_rm |= (s->rex_prefix & 1) << 3;
+                s->ea.mem.off = *decode_gpr(s->regs, s->modrm_rm);
+                if ( (s->modrm_rm == 5) && (s->modrm_mod != 0) )
+                    s->ea.mem.seg = x86_seg_ss;
+            }
+            switch ( s->modrm_mod )
+            {
+            case 0:
+                if ( (s->modrm_rm & 7) != 5 )
+                    break;
+                s->ea.mem.off = insn_fetch_type(int32_t);
+                pc_rel = mode_64bit();
+                break;
+            case 1:
+                s->ea.mem.off += insn_fetch_type(int8_t) * (1 << disp8scale);
+                break;
+            case 2:
+                s->ea.mem.off += insn_fetch_type(int32_t);
+                break;
+            }
+        }
+    }
+    else
+    {
+        s->modrm_mod = 0xff;
+        s->modrm_reg = s->modrm_rm = s->modrm = 0;
+    }
+
+    if ( override_seg != x86_seg_none )
+        s->ea.mem.seg = override_seg;
+
+    /* Fetch the immediate operand, if present. */
+    switch ( d & SrcMask )
+    {
+        unsigned int bytes;
+
+    case SrcImm:
+        if ( !(d & ByteOp) )
+        {
+            if ( mode_64bit() && !amd_like(ctxt) &&
+                 ((s->ext == ext_none && (b | 1) == 0xe9) /* call / jmp */ ||
+                  (s->ext == ext_0f && (b | 0xf) == 0x8f) /* jcc */ ) )
+                s->op_bytes = 4;
+            bytes = s->op_bytes != 8 ? s->op_bytes : 4;
+        }
+        else
+        {
+    case SrcImmByte:
+            bytes = 1;
+        }
+        /* NB. Immediates are sign-extended as necessary. */
+        switch ( bytes )
+        {
+        case 1: s->imm1 = insn_fetch_type(int8_t);  break;
+        case 2: s->imm1 = insn_fetch_type(int16_t); break;
+        case 4: s->imm1 = insn_fetch_type(int32_t); break;
+        }
+        break;
+    case SrcImm16:
+        s->imm1 = insn_fetch_type(uint16_t);
+        break;
+    }
+
+    ctxt->opcode = opcode;
+    s->desc = d;
+
+    switch ( s->ext )
+    {
+    case ext_none:
+        rc = decode_onebyte(s, ctxt, ops);
+        break;
+
+    case ext_0f:
+        rc = decode_twobyte(s, ctxt, ops);
+        break;
+
+    case ext_0f38:
+        rc = decode_0f38(s, ctxt, ops);
+        break;
+
+    case ext_0f3a:
+        d = ext0f3a_table[b].to_mem ? DstMem | SrcReg : DstReg | SrcMem;
+        if ( ext0f3a_table[b].two_op )
+            d |= TwoOp;
+        else if ( ext0f3a_table[b].four_op && !mode_64bit() && s->vex.opcx )
+            s->imm1 &= 0x7f;
+        s->desc = d;
+        rc = decode_0f3a(s, ctxt, ops);
+        break;
+
+    case ext_8f08:
+        d = DstReg | SrcMem;
+        if ( ext8f08_table[b].two_op )
+            d |= TwoOp;
+        else if ( ext8f08_table[b].four_op && !mode_64bit() )
+            s->imm1 &= 0x7f;
+        s->desc = d;
+        s->simd_size = ext8f08_table[b].simd_size;
+        break;
+
+    case ext_8f09:
+    case ext_8f0a:
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        return X86EMUL_UNIMPLEMENTED;
+    }
+
+    if ( s->ea.type == OP_MEM )
+    {
+        if ( pc_rel )
+            s->ea.mem.off += s->ip;
+
+        s->ea.mem.off = truncate_ea(s->ea.mem.off);
+    }
+
+    /*
+     * Simple op_bytes calculations. More complicated cases produce 0
+     * and are further handled during execute.
+     */
+    switch ( s->simd_size )
+    {
+    case simd_none:
+        /*
+         * When prefix 66 has a meaning different from operand-size override,
+         * operand size defaults to 4 and can't be overridden to 2.
+         */
+        if ( s->op_bytes == 2 &&
+             (ctxt->opcode & X86EMUL_OPC_PFX_MASK) == X86EMUL_OPC_66(0, 0) )
+            s->op_bytes = 4;
+        break;
+
+#ifndef X86EMUL_NO_SIMD
+    case simd_packed_int:
+        switch ( s->vex.pfx )
+        {
+        case vex_none:
+            if ( !s->vex.opcx )
+            {
+                s->op_bytes = 8;
+                break;
+            }
+            /* fall through */
+        case vex_66:
+            s->op_bytes = 16 << s->evex.lr;
+            break;
+        default:
+            s->op_bytes = 0;
+            break;
+        }
+        break;
+
+    case simd_single_fp:
+        if ( s->vex.pfx & VEX_PREFIX_DOUBLE_MASK )
+        {
+            s->op_bytes = 0;
+            break;
+    case simd_packed_fp:
+            if ( s->vex.pfx & VEX_PREFIX_SCALAR_MASK )
+            {
+                s->op_bytes = 0;
+                break;
+            }
+        }
+        /* fall through */
+    case simd_any_fp:
+        switch ( s->vex.pfx )
+        {
+        default:
+            s->op_bytes = 16 << s->evex.lr;
+            break;
+        case vex_f3:
+            generate_exception_if(evex_encoded() && s->evex.w, X86_EXC_UD);
+            s->op_bytes = 4;
+            break;
+        case vex_f2:
+            generate_exception_if(evex_encoded() && !s->evex.w, X86_EXC_UD);
+            s->op_bytes = 8;
+            break;
+        }
+        break;
+
+    case simd_scalar_opc:
+        s->op_bytes = 4 << (ctxt->opcode & 1);
+        break;
+
+    case simd_scalar_vexw:
+        s->op_bytes = 4 << s->vex.w;
+        break;
+
+    case simd_128:
+        /* The special cases here are MMX shift insns. */
+        s->op_bytes = s->vex.opcx || s->vex.pfx ? 16 : 8;
+        break;
+
+    case simd_256:
+        s->op_bytes = 32;
+        break;
+#endif /* !X86EMUL_NO_SIMD */
+
+    default:
+        s->op_bytes = 0;
+        break;
+    }
+
+ done:
+    return rc;
+}
--- a/xen/arch/x86/x86_emulate/private.h
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -37,9 +37,11 @@
 #ifdef __i386__
 # define mode_64bit() false
 # define r(name) e ## name
+# define PTR_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */
 #else
 # define mode_64bit() (ctxt->addr_size == 64)
 # define r(name) r ## name
+# define PTR_POISON ((void *)0x8086000000008086UL) /* non-canonical */
 #endif
 
 /* Operand sizes: 8-bit operands or specified/overridden size. */
@@ -76,6 +78,23 @@
 
 typedef uint8_t opcode_desc_t;
 
+enum disp8scale {
+    /* Values 0 ... 4 are explicit sizes. */
+    d8s_bw = 5,
+    d8s_dq,
+    /* EVEX.W ignored outside of 64-bit mode */
+    d8s_dq64,
+    /*
+     * All further values must strictly be last and in the order
+     * given so that arithmetic on the values works.
+     */
+    d8s_vl,
+    d8s_vl_by_2,
+    d8s_vl_by_4,
+    d8s_vl_by_8,
+};
+typedef uint8_t disp8scale_t;
+
 /* Type, address-of, and value of an instruction's operand. */
 struct operand {
     enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
@@ -182,6 +201,9 @@ enum vex_pfx {
     vex_f2
 };
 
+#define VEX_PREFIX_DOUBLE_MASK 0x1
+#define VEX_PREFIX_SCALAR_MASK 0x2
+
 union vex {
     uint8_t raw[2];
     struct {             /* SDM names */
@@ -706,6 +728,10 @@ do {
     if ( rc ) goto done;                                        \
 } while (0)
 
+int x86emul_decode(struct x86_emulate_state *s,
+                   struct x86_emulate_ctxt *ctxt,
+                   const struct x86_emulate_ops *ops);
+
 int x86emul_fpu(struct x86_emulate_state *s,
                 struct cpu_user_regs *regs,
                 struct operand *dst,
@@ -735,6 +761,13 @@ int x86emul_0fc7(struct x86_emulate_stat
                  const struct x86_emulate_ops *ops,
                  mmval_t *mmvalp);
 
+/* Initialise output state in x86_emulate_ctxt */
+static inline void init_context(struct x86_emulate_ctxt *ctxt)
+{
+    ctxt->retire.raw = 0;
+    x86_emul_reset_event(ctxt);
+}
+
 static inline bool is_aligned(enum x86_segment seg, unsigned long offs,
                               unsigned int size, struct x86_emulate_ctxt *ctxt,
                               const struct x86_emulate_ops *ops)
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -22,274 +22,6 @@
 
 #include "private.h"
 
-static const opcode_desc_t opcode_table[256] = {
-    /* 0x00 - 0x07 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, ImplicitOps|Mov,
-    /* 0x08 - 0x0F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, 0,
-    /* 0x10 - 0x17 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, ImplicitOps|Mov,
-    /* 0x18 - 0x1F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, ImplicitOps|Mov,
-    /* 0x20 - 0x27 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
-    /* 0x28 - 0x2F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
-    /* 0x30 - 0x37 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
-    /* 0x38 - 0x3F */
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
-    /* 0x40 - 0x4F */
-    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-    /* 0x50 - 0x5F */
-    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
-    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
-    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
-    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
-    /* 0x60 - 0x67 */
-    ImplicitOps, ImplicitOps, DstReg|SrcMem|ModRM, DstReg|SrcNone|ModRM|Mov,
-    0, 0, 0, 0,
-    /* 0x68 - 0x6F */
-    DstImplicit|SrcImm|Mov, DstReg|SrcImm|ModRM|Mov,
-    DstImplicit|SrcImmByte|Mov, DstReg|SrcImmByte|ModRM|Mov,
-    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
-    /* 0x70 - 0x77 */
-    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
-    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
-    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
-    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
-    /* 0x78 - 0x7F */
-    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
-    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
-    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
-    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
-    /* 0x80 - 0x87 */
-    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
-    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    /* 0x88 - 0x8F */
-    ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov,
-    ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstMem|SrcReg|ModRM|Mov, DstReg|SrcNone|ModRM,
-    DstReg|SrcMem16|ModRM|Mov, DstMem|SrcNone|ModRM|Mov,
-    /* 0x90 - 0x97 */
-    DstImplicit|SrcEax, DstImplicit|SrcEax,
-    DstImplicit|SrcEax, DstImplicit|SrcEax,
-    DstImplicit|SrcEax, DstImplicit|SrcEax,
-    DstImplicit|SrcEax, DstImplicit|SrcEax,
-    /* 0x98 - 0x9F */
-    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps, ImplicitOps,
-    /* 0xA0 - 0xA7 */
-    ByteOp|DstEax|SrcMem|Mov, DstEax|SrcMem|Mov,
-    ByteOp|DstMem|SrcEax|Mov, DstMem|SrcEax|Mov,
-    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
-    ByteOp|ImplicitOps, ImplicitOps,
-    /* 0xA8 - 0xAF */
-    ByteOp|DstEax|SrcImm, DstEax|SrcImm,
-    ByteOp|DstImplicit|SrcEax|Mov, DstImplicit|SrcEax|Mov,
-    ByteOp|DstEax|SrcImplicit|Mov, DstEax|SrcImplicit|Mov,
-    ByteOp|DstImplicit|SrcEax, DstImplicit|SrcEax,
-    /* 0xB0 - 0xB7 */
-    ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
-    ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
-    ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
-    ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
-    /* 0xB8 - 0xBF */
-    DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov,
-    DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov,
-    /* 0xC0 - 0xC7 */
-    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
-    DstImplicit|SrcImm16, ImplicitOps,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    ByteOp|DstMem|SrcImm|ModRM|Mov, DstMem|SrcImm|ModRM|Mov,
-    /* 0xC8 - 0xCF */
-    DstImplicit|SrcImm16, ImplicitOps, DstImplicit|SrcImm16, ImplicitOps,
-    ImplicitOps, DstImplicit|SrcImmByte, ImplicitOps, ImplicitOps,
-    /* 0xD0 - 0xD7 */
-    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
-    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
-    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte, ImplicitOps, ImplicitOps,
-    /* 0xD8 - 0xDF */
-    ImplicitOps|ModRM, ImplicitOps|ModRM|Mov,
-    ImplicitOps|ModRM, ImplicitOps|ModRM|Mov,
-    ImplicitOps|ModRM, ImplicitOps|ModRM|Mov,
-    DstImplicit|SrcMem16|ModRM, ImplicitOps|ModRM|Mov,
-    /* 0xE0 - 0xE7 */
-    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
-    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
-    DstEax|SrcImmByte, DstEax|SrcImmByte,
-    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
-    /* 0xE8 - 0xEF */
-    DstImplicit|SrcImm|Mov, DstImplicit|SrcImm,
-    ImplicitOps, DstImplicit|SrcImmByte,
-    DstEax|SrcImplicit, DstEax|SrcImplicit, ImplicitOps, ImplicitOps,
-    /* 0xF0 - 0xF7 */
-    0, ImplicitOps, 0, 0,
-    ImplicitOps, ImplicitOps, ByteOp|ModRM, ModRM,
-    /* 0xF8 - 0xFF */
-    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-    ImplicitOps, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
-};
-
-enum disp8scale {
-    /* Values 0 ... 4 are explicit sizes. */
-    d8s_bw = 5,
-    d8s_dq,
-    /* EVEX.W ignored outside of 64-bit mode */
-    d8s_dq64,
-    /*
-     * All further values must strictly be last and in the order
-     * given so that arithmetic on the values works.
-     */
-    d8s_vl,
-    d8s_vl_by_2,
-    d8s_vl_by_4,
-    d8s_vl_by_8,
-};
-typedef uint8_t disp8scale_t;
-
-static const struct twobyte_table {
-    opcode_desc_t desc;
-    simd_opsize_t size:4;
-    disp8scale_t d8s:4;
-} twobyte_table[256] = {
-    [0x00] = { ModRM },
-    [0x01] = { ImplicitOps|ModRM },
-    [0x02] = { DstReg|SrcMem16|ModRM },
-    [0x03] = { DstReg|SrcMem16|ModRM },
-    [0x05] = { ImplicitOps },
-    [0x06] = { ImplicitOps },
-    [0x07] = { ImplicitOps },
-    [0x08] = { ImplicitOps },
-    [0x09] = { ImplicitOps },
-    [0x0b] = { ImplicitOps },
-    [0x0d] = { ImplicitOps|ModRM },
-    [0x0e] = { ImplicitOps },
-    [0x0f] = { ModRM|SrcImmByte },
-    [0x10] = { DstImplicit|SrcMem|ModRM|Mov, simd_any_fp, d8s_vl },
-    [0x11] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp, d8s_vl },
-    [0x12] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, 3 },
-    [0x13] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 },
-    [0x14 ... 0x15] = { DstImplicit|SrcMem|ModRM, simd_packed_fp, d8s_vl },
-    [0x16] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, 3 },
-    [0x17] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 },
-    [0x18 ... 0x1f] = { ImplicitOps|ModRM },
-    [0x20 ... 0x21] = { DstMem|SrcImplicit|ModRM },
-    [0x22 ... 0x23] = { DstImplicit|SrcMem|ModRM },
-    [0x28] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
-    [0x29] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_fp, d8s_vl },
-    [0x2a] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, d8s_dq64 },
-    [0x2b] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp, d8s_vl },
-    [0x2c ... 0x2d] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
-    [0x2e ... 0x2f] = { ImplicitOps|ModRM|TwoOp, simd_none, d8s_dq },
-    [0x30 ... 0x35] = { ImplicitOps },
-    [0x37] = { ImplicitOps },
-    [0x38] = { DstReg|SrcMem|ModRM },
-    [0x3a] = { DstReg|SrcImmByte|ModRM },
-    [0x40 ... 0x4f] = { DstReg|SrcMem|ModRM|Mov },
-    [0x50] = { DstReg|SrcImplicit|ModRM|Mov },
-    [0x51] = { DstImplicit|SrcMem|ModRM|TwoOp, simd_any_fp, d8s_vl },
-    [0x52 ... 0x53] = { DstImplicit|SrcMem|ModRM|TwoOp, simd_single_fp },
-    [0x54 ... 0x57] = { DstImplicit|SrcMem|ModRM, simd_packed_fp, d8s_vl },
-    [0x58 ... 0x59] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
-    [0x5a] = { DstImplicit|SrcMem|ModRM|Mov, simd_any_fp, d8s_vl },
-    [0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
-    [0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
-    [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl },
-    [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0x68 ... 0x6a] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl },
-    [0x6b ... 0x6d] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0x6e] = { DstImplicit|SrcMem|ModRM|Mov, simd_none, d8s_dq64 },
-    [0x6f] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_int, d8s_vl },
-    [0x70] = { SrcImmByte|ModRM|TwoOp, simd_other, d8s_vl },
-    [0x71 ... 0x73] = { DstImplicit|SrcImmByte|ModRM, simd_none, d8s_vl },
-    [0x74 ... 0x76] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0x77] = { DstImplicit|SrcNone },
-    [0x78 ... 0x79] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, d8s_vl },
-    [0x7a] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
-    [0x7b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, d8s_dq64 },
-    [0x7c ... 0x7d] = { DstImplicit|SrcMem|ModRM, simd_other },
-    [0x7e] = { DstMem|SrcImplicit|ModRM|Mov, simd_none, d8s_dq64 },
-    [0x7f] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
-    [0x80 ... 0x8f] = { DstImplicit|SrcImm },
-    [0x90 ... 0x9f] = { ByteOp|DstMem|SrcNone|ModRM|Mov },
-    [0xa0 ... 0xa1] = { ImplicitOps|Mov },
-    [0xa2] = { ImplicitOps },
-    [0xa3] = { DstBitBase|SrcReg|ModRM },
-    [0xa4] = { DstMem|SrcImmByte|ModRM },
-    [0xa5] = { DstMem|SrcReg|ModRM },
-    [0xa6 ... 0xa7] = { ModRM },
-    [0xa8 ... 0xa9] = { ImplicitOps|Mov },
-    [0xaa] = { ImplicitOps },
-    [0xab] = { DstBitBase|SrcReg|ModRM },
-    [0xac] = { DstMem|SrcImmByte|ModRM },
-    [0xad] = { DstMem|SrcReg|ModRM },
-    [0xae] = { ImplicitOps|ModRM },
-    [0xaf] = { DstReg|SrcMem|ModRM },
-    [0xb0] = { ByteOp|DstMem|SrcReg|ModRM },
-    [0xb1] = { DstMem|SrcReg|ModRM },
-    [0xb2] = { DstReg|SrcMem|ModRM|Mov },
-    [0xb3] = { DstBitBase|SrcReg|ModRM },
-    [0xb4 ... 0xb5] = { DstReg|SrcMem|ModRM|Mov },
-    [0xb6] = { ByteOp|DstReg|SrcMem|ModRM|Mov },
-    [0xb7] = { DstReg|SrcMem16|ModRM|Mov },
-    [0xb8] = { DstReg|SrcMem|ModRM },
-    [0xb9] = { ModRM },
-    [0xba] = { DstBitBase|SrcImmByte|ModRM },
-    [0xbb] = { DstBitBase|SrcReg|ModRM },
-    [0xbc ... 0xbd] = { DstReg|SrcMem|ModRM },
-    [0xbe] = { ByteOp|DstReg|SrcMem|ModRM|Mov },
-    [0xbf] = { DstReg|SrcMem16|ModRM|Mov },
-    [0xc0] = { ByteOp|DstMem|SrcReg|ModRM },
-    [0xc1] = { DstMem|SrcReg|ModRM },
-    [0xc2] = { DstImplicit|SrcImmByte|ModRM, simd_any_fp, d8s_vl },
-    [0xc3] = { DstMem|SrcReg|ModRM|Mov },
-    [0xc4] = { DstImplicit|SrcImmByte|ModRM, simd_none, 1 },
-    [0xc5] = { DstReg|SrcImmByte|ModRM|Mov },
-    [0xc6] = { DstImplicit|SrcImmByte|ModRM, simd_packed_fp, d8s_vl },
-    [0xc7] = { ImplicitOps|ModRM },
-    [0xc8 ... 0xcf] = { ImplicitOps },
-    [0xd0] = { DstImplicit|SrcMem|ModRM, simd_other },
-    [0xd1 ... 0xd3] = { DstImplicit|SrcMem|ModRM, simd_128, 4 },
-    [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0xd6] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 },
-    [0xd7] = { DstReg|SrcImplicit|ModRM|Mov },
-    [0xd8 ... 0xdf] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_128, 4 },
-    [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
-    [0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
-    [0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
-    [0xf1 ... 0xf3] = { DstImplicit|SrcMem|ModRM, simd_128, 4 },
-    [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0xf7] = { DstMem|SrcMem|ModRM|Mov, simd_packed_int },
-    [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0xff] = { ModRM }
-};
-
 /*
  * The next two tables are indexed by high opcode extension byte (the one
  * that's encoded like an immediate) nibble, with each table element then
@@ -325,257 +57,9 @@ static const uint16_t _3dnow_ext_table[1
     [0xb] = (1 << 0xb) /* pswapd */,
 };
 
-/*
- * "two_op" and "four_op" below refer to the number of register operands
- * (one of which possibly also allowing to be a memory one). The named
- * operand counts do not include any immediate operands.
- */
-static const struct ext0f38_table {
-    uint8_t simd_size:5;
-    uint8_t to_mem:1;
-    uint8_t two_op:1;
-    uint8_t vsib:1;
-    disp8scale_t d8s:4;
-} ext0f38_table[256] = {
-    [0x00] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x01 ... 0x03] = { .simd_size = simd_packed_int },
-    [0x04] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x05 ... 0x0a] = { .simd_size = simd_packed_int },
-    [0x0b] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x0c ... 0x0d] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0x0e ... 0x0f] = { .simd_size = simd_packed_fp },
-    [0x10 ... 0x12] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x13] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
-    [0x14 ... 0x16] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0x18] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 2 },
-    [0x19] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 3 },
-    [0x1a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 },
-    [0x1b] = { .simd_size = simd_256, .two_op = 1, .d8s = d8s_vl_by_2 },
-    [0x1c ... 0x1f] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
-    [0x20] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
-    [0x21] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
-    [0x22] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_8 },
-    [0x23] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
-    [0x24] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
-    [0x25] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
-    [0x26 ... 0x29] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x2a] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
-    [0x2b] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x2c] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0x2d] = { .simd_size = simd_packed_fp, .d8s = d8s_dq },
-    [0x2e ... 0x2f] = { .simd_size = simd_packed_fp, .to_mem = 1 },
-    [0x30] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
-    [0x31] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
-    [0x32] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_8 },
-    [0x33] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
-    [0x34] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
-    [0x35] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
-    [0x36 ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x40] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0x42] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
-    [0x43] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0x44] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
-    [0x45 ... 0x47] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x4c] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
-    [0x4d] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0x4e] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
-    [0x4f] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0x50 ... 0x53] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x54 ... 0x55] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
-    [0x58] = { .simd_size = simd_other, .two_op = 1, .d8s = 2 },
-    [0x59] = { .simd_size = simd_other, .two_op = 1, .d8s = 3 },
-    [0x5a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 },
-    [0x5b] = { .simd_size = simd_256, .two_op = 1, .d8s = d8s_vl_by_2 },
-    [0x62] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_bw },
-    [0x63] = { .simd_size = simd_packed_int, .to_mem = 1, .two_op = 1, .d8s = d8s_bw },
-    [0x64 ... 0x66] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x68] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x70 ... 0x73] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x75 ... 0x76] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x77] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0x78] = { .simd_size = simd_other, .two_op = 1 },
-    [0x79] = { .simd_size = simd_other, .two_op = 1, .d8s = 1 },
-    [0x7a ... 0x7c] = { .simd_size = simd_none, .two_op = 1 },
-    [0x7d ... 0x7e] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x7f] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0x82] = { .simd_size = simd_other },
-    [0x83] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x88] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_dq },
-    [0x89] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_dq },
-    [0x8a] = { .simd_size = simd_packed_fp, .to_mem = 1, .two_op = 1, .d8s = d8s_dq },
-    [0x8b] = { .simd_size = simd_packed_int, .to_mem = 1, .two_op = 1, .d8s = d8s_dq },
-    [0x8c] = { .simd_size = simd_packed_int },
-    [0x8d] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x8e] = { .simd_size = simd_packed_int, .to_mem = 1 },
-    [0x8f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x90 ... 0x93] = { .simd_size = simd_other, .vsib = 1, .d8s = d8s_dq },
-    [0x96 ... 0x98] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0x99] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0x9a] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0x9b] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0x9c] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0x9d] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0x9e] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0x9f] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0xa0 ... 0xa3] = { .simd_size = simd_other, .to_mem = 1, .vsib = 1, .d8s = d8s_dq },
-    [0xa6 ... 0xa8] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0xa9] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0xaa] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0xab] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0xac] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0xad] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0xae] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0xaf] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0xb4 ... 0xb5] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0xb6 ... 0xb8] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0xb9] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0xba] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0xbb] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0xbc] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0xbd] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0xbe] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0xbf] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0xc4] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
-    [0xc6 ... 0xc7] = { .simd_size = simd_other, .vsib = 1, .d8s = d8s_dq },
-    [0xc8] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
-    [0xc9] = { .simd_size = simd_other },
-    [0xca] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
-    [0xcb] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0xcc] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
-    [0xcd] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0xcf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0xdb] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0xdc ... 0xdf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0xf0] = { .two_op = 1 },
-    [0xf1] = { .to_mem = 1, .two_op = 1 },
-    [0xf2 ... 0xf3] = {},
-    [0xf5 ... 0xf7] = {},
-    [0xf8] = { .simd_size = simd_other },
-    [0xf9] = { .to_mem = 1, .two_op = 1 /* Mov */ },
-};
-
 /* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
 static const uint8_t pmov_convert_delta[] = { 1, 2, 3, 1, 2, 1 };
 
-static const struct ext0f3a_table {
-    uint8_t simd_size:5;
-    uint8_t to_mem:1;
-    uint8_t two_op:1;
-    uint8_t four_op:1;
-    disp8scale_t d8s:4;
-} ext0f3a_table[256] = {
-    [0x00] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
-    [0x01] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
-    [0x02] = { .simd_size = simd_packed_int },
-    [0x03] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x04 ... 0x05] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
-    [0x06] = { .simd_size = simd_packed_fp },
-    [0x08 ... 0x09] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
-    [0x0a ... 0x0b] = { .simd_size = simd_scalar_opc, .d8s = d8s_dq },
-    [0x0c ... 0x0d] = { .simd_size = simd_packed_fp },
-    [0x0e] = { .simd_size = simd_packed_int },
-    [0x0f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x14] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 0 },
-    [0x15] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 1 },
-    [0x16] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = d8s_dq64 },
-    [0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 2 },
-    [0x18] = { .simd_size = simd_128, .d8s = 4 },
-    [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1, .d8s = 4 },
-    [0x1a] = { .simd_size = simd_256, .d8s = d8s_vl_by_2 },
-    [0x1b] = { .simd_size = simd_256, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 },
-    [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 },
-    [0x1e ... 0x1f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x20] = { .simd_size = simd_none, .d8s = 0 },
-    [0x21] = { .simd_size = simd_other, .d8s = 2 },
-    [0x22] = { .simd_size = simd_none, .d8s = d8s_dq64 },
-    [0x23] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x25] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x26] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
-    [0x27] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0x30 ... 0x33] = { .simd_size = simd_other, .two_op = 1 },
-    [0x38] = { .simd_size = simd_128, .d8s = 4 },
-    [0x3a] = { .simd_size = simd_256, .d8s = d8s_vl_by_2 },
-    [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1, .d8s = 4 },
-    [0x3b] = { .simd_size = simd_256, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 },
-    [0x3e ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x40 ... 0x41] = { .simd_size = simd_packed_fp },
-    [0x42 ... 0x43] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x44] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x46] = { .simd_size = simd_packed_int },
-    [0x48 ... 0x49] = { .simd_size = simd_packed_fp, .four_op = 1 },
-    [0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
-    [0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
-    [0x50] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0x51] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0x54] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
-    [0x55] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0x56] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
-    [0x57] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
-    [0x5c ... 0x5f] = { .simd_size = simd_packed_fp, .four_op = 1 },
-    [0x60 ... 0x63] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0x66] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
-    [0x67] = { .simd_size = simd_scalar_vexw, .two_op = 1, .d8s = d8s_dq },
-    [0x68 ... 0x69] = { .simd_size = simd_packed_fp, .four_op = 1 },
-    [0x6a ... 0x6b] = { .simd_size = simd_scalar_opc, .four_op = 1 },
-    [0x6c ... 0x6d] = { .simd_size = simd_packed_fp, .four_op = 1 },
-    [0x6e ... 0x6f] = { .simd_size = simd_scalar_opc, .four_op = 1 },
-    [0x70 ... 0x73] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x78 ... 0x79] = { .simd_size = simd_packed_fp, .four_op = 1 },
-    [0x7a ... 0x7b] = { .simd_size = simd_scalar_opc, .four_op = 1 },
-    [0x7c ... 0x7d] = { .simd_size = simd_packed_fp, .four_op = 1 },
-    [0x7e ... 0x7f] = { .simd_size = simd_scalar_opc, .four_op = 1 },
-    [0xcc] = { .simd_size = simd_other },
-    [0xce ... 0xcf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0xdf] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0xf0] = {},
-};
-
-static const opcode_desc_t xop_table[] = {
-    DstReg|SrcImmByte|ModRM,
-    DstReg|SrcMem|ModRM,
-    DstReg|SrcImm|ModRM,
-};
-
-static const struct ext8f08_table {
-    uint8_t simd_size:5;
-    uint8_t two_op:1;
-    uint8_t four_op:1;
-} ext8f08_table[256] = {
-    [0xa2] = { .simd_size = simd_packed_int, .four_op = 1 },
-    [0x85 ... 0x87] = { .simd_size = simd_packed_int, .four_op = 1 },
-    [0x8e ... 0x8f] = { .simd_size = simd_packed_int, .four_op = 1 },
-    [0x95 ... 0x97] = { .simd_size = simd_packed_int, .four_op = 1 },
-    [0x9e ... 0x9f] = { .simd_size = simd_packed_int, .four_op = 1 },
-    [0xa3] = { .simd_size = simd_packed_int, .four_op = 1 },
-    [0xa6] = { .simd_size = simd_packed_int, .four_op = 1 },
-    [0xb6] = { .simd_size = simd_packed_int, .four_op = 1 },
-    [0xc0 ... 0xc3] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0xcc ... 0xcf] = { .simd_size = simd_packed_int },
-    [0xec ... 0xef] = { .simd_size = simd_packed_int },
-};
-
-static const struct ext8f09_table {
-    uint8_t simd_size:5;
-    uint8_t two_op:1;
-} ext8f09_table[256] = {
-    [0x01 ... 0x02] = { .two_op = 1 },
-    [0x80 ... 0x81] = { .simd_size = simd_packed_fp, .two_op = 1 },
-    [0x82 ... 0x83] = { .simd_size = simd_scalar_opc, .two_op = 1 },
-    [0x90 ... 0x9b] = { .simd_size = simd_packed_int },
-    [0xc1 ... 0xc3] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0xc6 ... 0xc7] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0xcb] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0xd1 ... 0xd3] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0xd6 ... 0xd7] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0xdb] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0xe1 ... 0xe3] = { .simd_size = simd_packed_int, .two_op = 1 },
-};
-
-#define VEX_PREFIX_DOUBLE_MASK 0x1
-#define VEX_PREFIX_SCALAR_MASK 0x2
-
 static const uint8_t sse_prefix[] = { 0x66, 0xf3, 0xf2 };
 
 #ifdef __x86_64__
@@ -637,12 +121,6 @@ static const uint8_t sse_prefix[] = { 0x
 #define repe_prefix()  (vex.pfx == vex_f3)
 #define repne_prefix() (vex.pfx == vex_f2)
 
-#ifdef __x86_64__
-#define PTR_POISON ((void *)0x8086000000008086UL) /* non-canonical */
-#else
-#define PTR_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */
-#endif
-
 /*
  * While proper alignment gets specified in mmval_t, this doesn't get honored
  * by the compiler for automatic variables. Use this helper to instantiate a
@@ -831,19 +309,6 @@ do{ asm volatile (
                 : [msk] "i" (EFLAGS_MASK), ## src);                     \
 } while (0)
 
-/* Fetch next part of the instruction being emulated. */
-#define insn_fetch_bytes(_size)                                         \
-({ unsigned long _x = 0, _ip = state->ip;                               \
-   state->ip += (_size); /* real hardware doesn't truncate */           \
-   generate_exception_if((uint8_t)(state->ip -                          \
-                                   ctxt->regs->r(ip)) > MAX_INST_LEN,   \
-                         EXC_GP, 0);                                    \
-   rc = ops->insn_fetch(x86_seg_cs, _ip, &_x, (_size), ctxt);           \
-   if ( rc ) goto done;                                                 \
-   _x;                                                                  \
-})
-#define insn_fetch_type(_type) ((_type)insn_fetch_bytes(sizeof(_type)))
-
 /*
  * Given byte has even parity (even number of 1s)? SDM Vol. 1 Sec. 3.4.3.1,
  * "Status Flags": EFLAGS.PF reflects parity of least-sig. byte of result only.
@@ -1354,13 +819,6 @@ static int ioport_access_check(
     return rc;
 }
 
-/* Initialise output state in x86_emulate_ctxt */
-static void init_context(struct x86_emulate_ctxt *ctxt)
-{
-    ctxt->retire.raw = 0;
-    x86_emul_reset_event(ctxt);
-}
-
 static int
 realmode_load_seg(
     enum x86_segment seg,
@@ -1707,51 +1165,6 @@ static unsigned long *decode_vex_gpr(
     return decode_gpr(regs, ~vex_reg & (mode_64bit() ? 0xf : 7));
 }
 
-static unsigned int decode_disp8scale(enum disp8scale scale,
-                                      const struct x86_emulate_state *state)
-{
-    switch ( scale )
-    {
-    case d8s_bw:
-        return state->evex.w;
-
-    default:
-        if ( scale < d8s_vl )
-            return scale;
-        if ( state->evex.brs )
-        {
-    case d8s_dq:
-            return 2 + state->evex.w;
-        }
-        break;
-
-    case d8s_dq64:
-        return 2 + (state->op_bytes == 8);
-    }
-
-    switch ( state->simd_size )
-    {
-    case simd_any_fp:
-    case simd_single_fp:
-        if ( !(state->evex.pfx & VEX_PREFIX_SCALAR_MASK) )
-            break;
-        /* fall through */
-    case simd_scalar_opc:
-    case simd_scalar_vexw:
-        return 2 + state->evex.w;
-
-    case simd_128:
-        /* These should have an explicit size specified. */
-        ASSERT_UNREACHABLE();
-        return 4;
-
-    default:
-        break;
-    }
-
-    return 4 + state->evex.lr - (scale - d8s_vl);
-}
-
 #define avx512_vlen_check(lig) do { \
     switch ( evex.lr ) \
     { \
@@ -1833,1138 +1246,6 @@ int x86emul_unhandleable_rw(
 #define evex_encoded() (evex.mbs)
 #define ea (state->ea)
 
-static int
-x86_decode_onebyte(
-    struct x86_emulate_state *state,
-    struct x86_emulate_ctxt *ctxt,
-    const struct x86_emulate_ops *ops)
-{
-    int rc = X86EMUL_OKAY;
-
-    switch ( ctxt->opcode )
-    {
-    case 0x06: /* push %%es */
-    case 0x07: /* pop %%es */
-    case 0x0e: /* push %%cs */
-    case 0x16: /* push %%ss */
-    case 0x17: /* pop %%ss */
-    case 0x1e: /* push %%ds */
-    case 0x1f: /* pop %%ds */
-    case 0x27: /* daa */
-    case 0x2f: /* das */
-    case 0x37: /* aaa */
-    case 0x3f: /* aas */
-    case 0x60: /* pusha */
-    case 0x61: /* popa */
-    case 0x62: /* bound */
-    case 0xc4: /* les */
-    case 0xc5: /* lds */
-    case 0xce: /* into */
-    case 0xd4: /* aam */
-    case 0xd5: /* aad */
-    case 0xd6: /* salc */
-        state->not_64bit = true;
-        break;
-
-    case 0x82: /* Grp1 (x86/32 only) */
-        state->not_64bit = true;
-        /* fall through */
-    case 0x80: case 0x81: case 0x83: /* Grp1 */
-        if ( (modrm_reg & 7) == 7 ) /* cmp */
-            state->desc = (state->desc & ByteOp) | DstNone | SrcMem;
-        break;
-
-    case 0x90: /* nop / pause */
-        if ( repe_prefix() )
-            ctxt->opcode |= X86EMUL_OPC_F3(0, 0);
-        break;
-
-    case 0x9a: /* call (far, absolute) */
-    case 0xea: /* jmp (far, absolute) */
-        generate_exception_if(mode_64bit(), EXC_UD);
-
-        imm1 = insn_fetch_bytes(op_bytes);
-        imm2 = insn_fetch_type(uint16_t);
-        break;
-
-    case 0xa0: case 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
-    case 0xa2: case 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
-        /* Source EA is not encoded via ModRM. */
-        ea.type = OP_MEM;
-        ea.mem.off = insn_fetch_bytes(ad_bytes);
-        break;
-
-    case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */
-        if ( op_bytes == 8 ) /* Fetch more bytes to obtain imm64. */
-            imm1 = ((uint32_t)imm1 |
-                    ((uint64_t)insn_fetch_type(uint32_t) << 32));
-        break;
-
-    case 0xc8: /* enter imm16,imm8 */
-        imm2 = insn_fetch_type(uint8_t);
-        break;
-
-    case 0xf6: case 0xf7: /* Grp3 */
-        if ( !(modrm_reg & 6) ) /* test */
-            state->desc = (state->desc & ByteOp) | DstNone | SrcMem;
-        break;
-
-    case 0xff: /* Grp5 */
-        switch ( modrm_reg & 7 )
-        {
-        case 2: /* call (near) */
-        case 4: /* jmp (near) */
-            if ( mode_64bit() && (op_bytes == 4 || !amd_like(ctxt)) )
-                op_bytes = 8;
-            state->desc = DstNone | SrcMem | Mov;
-            break;
-
-        case 3: /* call (far, absolute indirect) */
-        case 5: /* jmp (far, absolute indirect) */
-            /* REX.W ignored on a vendor-dependent basis. */
-            if ( op_bytes == 8 && amd_like(ctxt) )
-                op_bytes = 4;
-            state->desc = DstNone | SrcMem | Mov;
-            break;
-
-        case 6: /* push */
-            if ( mode_64bit() && op_bytes == 4 )
-                op_bytes = 8;
-            state->desc = DstNone | SrcMem | Mov;
-            break;
-        }
-        break;
-    }
-
- done:
-    return rc;
-}
-
-static int
-x86_decode_twobyte(
-    struct x86_emulate_state *state,
-    struct x86_emulate_ctxt *ctxt,
-    const struct x86_emulate_ops *ops)
-{
-    int rc = X86EMUL_OKAY;
-
-    switch ( ctxt->opcode & X86EMUL_OPC_MASK )
-    {
-    case 0x00: /* Grp6 */
-        switch ( modrm_reg & 6 )
-        {
-        case 0:
-            state->desc |= DstMem | SrcImplicit | Mov;
-            break;
-        case 2: case 4:
-            state->desc |= SrcMem16;
-            break;
-        }
-        break;
-
-    case 0x78:
-        state->desc = ImplicitOps;
-        state->simd_size = simd_none;
-        switch ( vex.pfx )
-        {
-        case vex_66: /* extrq $imm8, $imm8, xmm */
-        case vex_f2: /* insertq $imm8, $imm8, xmm, xmm */
-            imm1 = insn_fetch_type(uint8_t);
-            imm2 = insn_fetch_type(uint8_t);
-            break;
-        }
-        /* fall through */
-    case 0x10 ... 0x18:
-    case 0x28 ... 0x2f:
-    case 0x50 ... 0x77:
-    case 0x7a ... 0x7d:
-    case 0x7f:
-    case 0xc2 ... 0xc3:
-    case 0xc5 ... 0xc6:
-    case 0xd0 ... 0xef:
-    case 0xf1 ... 0xfe:
-        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
-        break;
-
-    case 0x20: case 0x22: /* mov to/from cr */
-        if ( lock_prefix && vcpu_has_cr8_legacy() )
-        {
-            modrm_reg += 8;
-            lock_prefix = false;
-        }
-        /* fall through */
-    case 0x21: case 0x23: /* mov to/from dr */
-        ASSERT(ea.type == OP_REG); /* Early operand adjustment ensures this. */
-        generate_exception_if(lock_prefix, EXC_UD);
-        op_bytes = mode_64bit() ? 8 : 4;
-        break;
-
-    case 0x79:
-        state->desc = DstReg | SrcMem;
-        state->simd_size = simd_packed_int;
-        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
-        break;
-
-    case 0x7e:
-        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
-        if ( vex.pfx == vex_f3 ) /* movq xmm/m64,xmm */
-        {
-    case X86EMUL_OPC_VEX_F3(0, 0x7e): /* vmovq xmm/m64,xmm */
-    case X86EMUL_OPC_EVEX_F3(0, 0x7e): /* vmovq xmm/m64,xmm */
-            state->desc = DstImplicit | SrcMem | TwoOp;
-            state->simd_size = simd_other;
-            /* Avoid the state->desc clobbering of TwoOp below. */
-            return X86EMUL_OKAY;
-        }
-        break;
-
-    case X86EMUL_OPC_VEX(0, 0x90):    /* kmov{w,q} */
-    case X86EMUL_OPC_VEX_66(0, 0x90): /* kmov{b,d} */
-        state->desc = DstReg | SrcMem | Mov;
-        state->simd_size = simd_other;
-        break;
-
-    case X86EMUL_OPC_VEX(0, 0x91):    /* kmov{w,q} */
-    case X86EMUL_OPC_VEX_66(0, 0x91): /* kmov{b,d} */
-        state->desc = DstMem | SrcReg | Mov;
-        state->simd_size = simd_other;
-        break;
-
-    case 0xae:
-        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
-        /* fall through */
-    case X86EMUL_OPC_VEX(0, 0xae):
-        switch ( modrm_reg & 7 )
-        {
-        case 2: /* {,v}ldmxcsr */
-            state->desc = DstImplicit | SrcMem | Mov;
-            op_bytes = 4;
-            break;
-
-        case 3: /* {,v}stmxcsr */
-            state->desc = DstMem | SrcImplicit | Mov;
-            op_bytes = 4;
-            break;
-        }
-        break;
-
-    case 0xb2: /* lss */
-    case 0xb4: /* lfs */
-    case 0xb5: /* lgs */
-        /* REX.W ignored on a vendor-dependent basis. */
-        if ( op_bytes == 8 && amd_like(ctxt) )
-            op_bytes = 4;
-        break;
-
-    case 0xb8: /* jmpe / popcnt */
-        if ( rep_prefix() )
-            ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
-        break;
-
-        /* Intentionally not handling here despite being modified by F3:
-    case 0xbc: bsf / tzcnt
-    case 0xbd: bsr / lzcnt
-         * They're being dealt with in the execution phase (if at all).
-         */
-
-    case 0xc4: /* pinsrw */
-        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
-        /* fall through */
-    case X86EMUL_OPC_VEX_66(0, 0xc4): /* vpinsrw */
-    case X86EMUL_OPC_EVEX_66(0, 0xc4): /* vpinsrw */
-        state->desc = DstImplicit | SrcMem16;
-        break;
-
-    case 0xf0:
-        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
-        if ( vex.pfx == vex_f2 ) /* lddqu mem,xmm */
-        {
-        /* fall through */
-    case X86EMUL_OPC_VEX_F2(0, 0xf0): /* vlddqu mem,{x,y}mm */
-            state->desc = DstImplicit | SrcMem | TwoOp;
-            state->simd_size = simd_other;
-            /* Avoid the state->desc clobbering of TwoOp below. */
-            return X86EMUL_OKAY;
-        }
-        break;
-    }
-
-    /*
-     * Scalar forms of most VEX-/EVEX-encoded TwoOp instructions have
-     * three operands.  Those which do really have two operands
-     * should have exited earlier.
-     */
-    if ( state->simd_size && vex.opcx &&
-         (vex.pfx & VEX_PREFIX_SCALAR_MASK) )
-        state->desc &= ~TwoOp;
-
- done:
-    return rc;
-}
-
-static int
-x86_decode_0f38(
-    struct x86_emulate_state *state,
-    struct x86_emulate_ctxt *ctxt,
-    const struct x86_emulate_ops *ops)
-{
-    switch ( ctxt->opcode & X86EMUL_OPC_MASK )
-    {
-    case 0x00 ... 0xef:
-    case 0xf2 ... 0xf5:
-    case 0xf7 ... 0xf8:
-    case 0xfa ... 0xff:
-        op_bytes = 0;
-        /* fall through */
-    case 0xf6: /* adcx / adox */
-    case 0xf9: /* movdiri */
-        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
-        break;
-
-    case X86EMUL_OPC_EVEX_66(0, 0x2d): /* vscalefs{s,d} */
-        state->simd_size = simd_scalar_vexw;
-        break;
-
-    case X86EMUL_OPC_EVEX_66(0, 0x7a): /* vpbroadcastb */
-    case X86EMUL_OPC_EVEX_66(0, 0x7b): /* vpbroadcastw */
-    case X86EMUL_OPC_EVEX_66(0, 0x7c): /* vpbroadcast{d,q} */
-        break;
-
-    case 0xf0: /* movbe / crc32 */
-        state->desc |= repne_prefix() ? ByteOp : Mov;
-        if ( rep_prefix() )
-            ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
-        break;
-
-    case 0xf1: /* movbe / crc32 */
-        if ( repne_prefix() )
-            state->desc = DstReg | SrcMem;
-        if ( rep_prefix() )
-            ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
-        break;
-
-    case X86EMUL_OPC_VEX(0, 0xf2):    /* andn */
-    case X86EMUL_OPC_VEX(0, 0xf3):    /* Grp 17 */
-    case X86EMUL_OPC_VEX(0, 0xf5):    /* bzhi */
-    case X86EMUL_OPC_VEX_F3(0, 0xf5): /* pext */
-    case X86EMUL_OPC_VEX_F2(0, 0xf5): /* pdep */
-    case X86EMUL_OPC_VEX_F2(0, 0xf6): /* mulx */
-    case X86EMUL_OPC_VEX(0, 0xf7):    /* bextr */
-    case X86EMUL_OPC_VEX_66(0, 0xf7): /* shlx */
-    case X86EMUL_OPC_VEX_F3(0, 0xf7): /* sarx */
-    case X86EMUL_OPC_VEX_F2(0, 0xf7): /* shrx */
-        break;
-
-    default:
-        op_bytes = 0;
-        break;
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int
-x86_decode_0f3a(
-    struct x86_emulate_state *state,
-    struct x86_emulate_ctxt *ctxt,
-    const struct x86_emulate_ops *ops)
-{
-    if ( !vex.opcx )
-        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
-
-    switch ( ctxt->opcode & X86EMUL_OPC_MASK )
-    {
-    case X86EMUL_OPC_66(0, 0x14)
-     ... X86EMUL_OPC_66(0, 0x17):     /* pextr*, extractps */
-    case X86EMUL_OPC_VEX_66(0, 0x14)
-     ... X86EMUL_OPC_VEX_66(0, 0x17): /* vpextr*, vextractps */
-    case X86EMUL_OPC_EVEX_66(0, 0x14)
-     ... X86EMUL_OPC_EVEX_66(0, 0x17): /* vpextr*, vextractps */
-    case X86EMUL_OPC_VEX_F2(0, 0xf0): /* rorx */
-        break;
-
-    case X86EMUL_OPC_66(0, 0x20):     /* pinsrb */
-    case X86EMUL_OPC_VEX_66(0, 0x20): /* vpinsrb */
-    case X86EMUL_OPC_EVEX_66(0, 0x20): /* vpinsrb */
-        state->desc = DstImplicit | SrcMem;
-        if ( modrm_mod != 3 )
-            state->desc |= ByteOp;
-        break;
-
-    case X86EMUL_OPC_66(0, 0x22):     /* pinsr{d,q} */
-    case X86EMUL_OPC_VEX_66(0, 0x22): /* vpinsr{d,q} */
-    case X86EMUL_OPC_EVEX_66(0, 0x22): /* vpinsr{d,q} */
-        state->desc = DstImplicit | SrcMem;
-        break;
-
-    default:
-        op_bytes = 0;
-        break;
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int
-x86_decode(
-    struct x86_emulate_state *state,
-    struct x86_emulate_ctxt *ctxt,
-    const struct x86_emulate_ops  *ops)
-{
-    uint8_t b, d;
-    unsigned int def_op_bytes, def_ad_bytes, opcode;
-    enum x86_segment override_seg = x86_seg_none;
-    bool pc_rel = false;
-    int rc = X86EMUL_OKAY;
-
-    ASSERT(ops->insn_fetch);
-
-    memset(state, 0, sizeof(*state));
-    ea.type = OP_NONE;
-    ea.mem.seg = x86_seg_ds;
-    ea.reg = PTR_POISON;
-    state->regs = ctxt->regs;
-    state->ip = ctxt->regs->r(ip);
-
-    op_bytes = def_op_bytes = ad_bytes = def_ad_bytes = ctxt->addr_size/8;
-    if ( op_bytes == 8 )
-    {
-        op_bytes = def_op_bytes = 4;
-#ifndef __x86_64__
-        return X86EMUL_UNHANDLEABLE;
-#endif
-    }
-
-    /* Prefix bytes. */
-    for ( ; ; )
-    {
-        switch ( b = insn_fetch_type(uint8_t) )
-        {
-        case 0x66: /* operand-size override */
-            op_bytes = def_op_bytes ^ 6;
-            if ( !vex.pfx )
-                vex.pfx = vex_66;
-            break;
-        case 0x67: /* address-size override */
-            ad_bytes = def_ad_bytes ^ (mode_64bit() ? 12 : 6);
-            break;
-        case 0x2e: /* CS override / ignored in 64-bit mode */
-            if ( !mode_64bit() )
-                override_seg = x86_seg_cs;
-            break;
-        case 0x3e: /* DS override / ignored in 64-bit mode */
-            if ( !mode_64bit() )
-                override_seg = x86_seg_ds;
-            break;
-        case 0x26: /* ES override / ignored in 64-bit mode */
-            if ( !mode_64bit() )
-                override_seg = x86_seg_es;
-            break;
-        case 0x64: /* FS override */
-            override_seg = x86_seg_fs;
-            break;
-        case 0x65: /* GS override */
-            override_seg = x86_seg_gs;
-            break;
-        case 0x36: /* SS override / ignored in 64-bit mode */
-            if ( !mode_64bit() )
-                override_seg = x86_seg_ss;
-            break;
-        case 0xf0: /* LOCK */
-            lock_prefix = 1;
-            break;
-        case 0xf2: /* REPNE/REPNZ */
-            vex.pfx = vex_f2;
-            break;
-        case 0xf3: /* REP/REPE/REPZ */
-            vex.pfx = vex_f3;
-            break;
-        case 0x40 ... 0x4f: /* REX */
-            if ( !mode_64bit() )
-                goto done_prefixes;
-            rex_prefix = b;
-            continue;
-        default:
-            goto done_prefixes;
-        }
-
-        /* Any legacy prefix after a REX prefix nullifies its effect. */
-        rex_prefix = 0;
-    }
- done_prefixes:
-
-    if ( rex_prefix & REX_W )
-        op_bytes = 8;
-
-    /* Opcode byte(s). */
-    d = opcode_table[b];
-    if ( d == 0 && b == 0x0f )
-    {
-        /* Two-byte opcode. */
-        b = insn_fetch_type(uint8_t);
-        d = twobyte_table[b].desc;
-        switch ( b )
-        {
-        default:
-            opcode = b | MASK_INSR(0x0f, X86EMUL_OPC_EXT_MASK);
-            ext = ext_0f;
-            state->simd_size = twobyte_table[b].size;
-            break;
-        case 0x38:
-            b = insn_fetch_type(uint8_t);
-            opcode = b | MASK_INSR(0x0f38, X86EMUL_OPC_EXT_MASK);
-            ext = ext_0f38;
-            break;
-        case 0x3a:
-            b = insn_fetch_type(uint8_t);
-            opcode = b | MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK);
-            ext = ext_0f3a;
-            break;
-        }
-    }
-    else
-        opcode = b;
-
-    /* ModRM and SIB bytes. */
-    if ( d & ModRM )
-    {
-        modrm = insn_fetch_type(uint8_t);
-        modrm_mod = (modrm & 0xc0) >> 6;
-
-        if ( !ext && ((b & ~1) == 0xc4 || (b == 0x8f && (modrm & 0x18)) ||
-                      b == 0x62) )
-            switch ( def_ad_bytes )
-            {
-            default:
-                BUG(); /* Shouldn't be possible. */
-            case 2:
-                if ( state->regs->eflags & X86_EFLAGS_VM )
-                    break;
-                /* fall through */
-            case 4:
-                if ( modrm_mod != 3 || in_realmode(ctxt, ops) )
-                    break;
-                /* fall through */
-            case 8:
-                /* VEX / XOP / EVEX */
-                generate_exception_if(rex_prefix || vex.pfx, EXC_UD);
-                /*
-                 * With operand size override disallowed (see above), op_bytes
-                 * should not have changed from its default.
-                 */
-                ASSERT(op_bytes == def_op_bytes);
-
-                vex.raw[0] = modrm;
-                if ( b == 0xc5 )
-                {
-                    opcode = X86EMUL_OPC_VEX_;
-                    vex.raw[1] = modrm;
-                    vex.opcx = vex_0f;
-                    vex.x = 1;
-                    vex.b = 1;
-                    vex.w = 0;
-                }
-                else
-                {
-                    vex.raw[1] = insn_fetch_type(uint8_t);
-                    if ( mode_64bit() )
-                    {
-                        if ( !vex.b )
-                            rex_prefix |= REX_B;
-                        if ( !vex.x )
-                            rex_prefix |= REX_X;
-                        if ( vex.w )
-                        {
-                            rex_prefix |= REX_W;
-                            op_bytes = 8;
-                        }
-                    }
-                    else
-                    {
-                        /* Operand size fixed at 4 (no override via W bit). */
-                        op_bytes = 4;
-                        vex.b = 1;
-                    }
-                    switch ( b )
-                    {
-                    case 0x62:
-                        opcode = X86EMUL_OPC_EVEX_;
-                        evex.raw[0] = vex.raw[0];
-                        evex.raw[1] = vex.raw[1];
-                        evex.raw[2] = insn_fetch_type(uint8_t);
-
-                        generate_exception_if(!evex.mbs || evex.mbz, EXC_UD);
-                        generate_exception_if(!evex.opmsk && evex.z, EXC_UD);
-
-                        if ( !mode_64bit() )
-                            evex.R = 1;
-
-                        vex.opcx = evex.opcx;
-                        break;
-                    case 0xc4:
-                        opcode = X86EMUL_OPC_VEX_;
-                        break;
-                    default:
-                        opcode = 0;
-                        break;
-                    }
-                }
-                if ( !vex.r )
-                    rex_prefix |= REX_R;
-
-                ext = vex.opcx;
-                if ( b != 0x8f )
-                {
-                    b = insn_fetch_type(uint8_t);
-                    switch ( ext )
-                    {
-                    case vex_0f:
-                        opcode |= MASK_INSR(0x0f, X86EMUL_OPC_EXT_MASK);
-                        d = twobyte_table[b].desc;
-                        state->simd_size = twobyte_table[b].size;
-                        break;
-                    case vex_0f38:
-                        opcode |= MASK_INSR(0x0f38, X86EMUL_OPC_EXT_MASK);
-                        d = twobyte_table[0x38].desc;
-                        break;
-                    case vex_0f3a:
-                        opcode |= MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK);
-                        d = twobyte_table[0x3a].desc;
-                        break;
-                    default:
-                        rc = X86EMUL_UNRECOGNIZED;
-                        goto done;
-                    }
-                }
-                else if ( ext < ext_8f08 + ARRAY_SIZE(xop_table) )
-                {
-                    b = insn_fetch_type(uint8_t);
-                    opcode |= MASK_INSR(0x8f08 + ext - ext_8f08,
-                                        X86EMUL_OPC_EXT_MASK);
-                    d = array_access_nospec(xop_table, ext - ext_8f08);
-                }
-                else
-                {
-                    rc = X86EMUL_UNRECOGNIZED;
-                    goto done;
-                }
-
-                opcode |= b | MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
-
-                if ( !evex_encoded() )
-                    evex.lr = vex.l;
-
-                if ( !(d & ModRM) )
-                    break;
-
-                modrm = insn_fetch_type(uint8_t);
-                modrm_mod = (modrm & 0xc0) >> 6;
-
-                break;
-            }
-    }
-
-    if ( d & ModRM )
-    {
-        unsigned int disp8scale = 0;
-
-        d &= ~ModRM;
-#undef ModRM /* Only its aliases are valid to use from here on. */
-        modrm_reg = ((rex_prefix & 4) << 1) | ((modrm & 0x38) >> 3) |
-                    ((evex_encoded() && !evex.R) << 4);
-        modrm_rm  = modrm & 0x07;
-
-        /*
-         * Early operand adjustments. Only ones affecting further processing
-         * prior to the x86_decode_*() calls really belong here. That would
-         * normally be only addition/removal of SrcImm/SrcImm16, so their
-         * fetching can be taken care of by the common code below.
-         */
-        switch ( ext )
-        {
-        case ext_none:
-            switch ( b )
-            {
-            case 0xf6 ... 0xf7: /* Grp3 */
-                switch ( modrm_reg & 7 )
-                {
-                case 0 ... 1: /* test */
-                    d |= DstMem | SrcImm;
-                    break;
-                case 2: /* not */
-                case 3: /* neg */
-                    d |= DstMem;
-                    break;
-                case 4: /* mul */
-                case 5: /* imul */
-                case 6: /* div */
-                case 7: /* idiv */
-                    /*
-                     * DstEax isn't really precise for all cases; updates to
-                     * rDX get handled in an open coded manner.
-                     */
-                    d |= DstEax | SrcMem;
-                    break;
-                }
-                break;
-            }
-            break;
-
-        case ext_0f:
-            if ( evex_encoded() )
-                disp8scale = decode_disp8scale(twobyte_table[b].d8s, state);
-
-            switch ( b )
-            {
-            case 0x12: /* vmovsldup / vmovddup */
-                if ( evex.pfx == vex_f2 )
-                    disp8scale = evex.lr ? 4 + evex.lr : 3;
-                /* fall through */
-            case 0x16: /* vmovshdup */
-                if ( evex.pfx == vex_f3 )
-                    disp8scale = 4 + evex.lr;
-                break;
-
-            case 0x20: /* mov cr,reg */
-            case 0x21: /* mov dr,reg */
-            case 0x22: /* mov reg,cr */
-            case 0x23: /* mov reg,dr */
-                /*
-                 * Mov to/from cr/dr ignore the encoding of Mod, and behave as
-                 * if they were encoded as reg/reg instructions.  No further
-                 * disp/SIB bytes are fetched.
-                 */
-                modrm_mod = 3;
-                break;
-
-            case 0x78:
-            case 0x79:
-                if ( !evex.pfx )
-                    break;
-                /* vcvt{,t}ps2uqq need special casing */
-                if ( evex.pfx == vex_66 )
-                {
-                    if ( !evex.w && !evex.brs )
-                        --disp8scale;
-                    break;
-                }
-                /* vcvt{,t}s{s,d}2usi need special casing: fall through */
-            case 0x2c: /* vcvtts{s,d}2si need special casing */
-            case 0x2d: /* vcvts{s,d}2si need special casing */
-                if ( evex_encoded() )
-                    disp8scale = 2 + (evex.pfx & VEX_PREFIX_DOUBLE_MASK);
-                break;
-
-            case 0x5a: /* vcvtps2pd needs special casing */
-                if ( disp8scale && !evex.pfx && !evex.brs )
-                    --disp8scale;
-                break;
-
-            case 0x7a: /* vcvttps2qq and vcvtudq2pd need special casing */
-                if ( disp8scale && evex.pfx != vex_f2 && !evex.w && !evex.brs )
-                    --disp8scale;
-                break;
-
-            case 0x7b: /* vcvtp{s,d}2qq need special casing */
-                if ( disp8scale && evex.pfx == vex_66 )
-                    disp8scale = (evex.brs ? 2 : 3 + evex.lr) + evex.w;
-                break;
-
-            case 0x7e: /* vmovq xmm/m64,xmm needs special casing */
-                if ( disp8scale == 2 && evex.pfx == vex_f3 )
-                    disp8scale = 3;
-                break;
-
-            case 0xe6: /* vcvtdq2pd needs special casing */
-                if ( disp8scale && evex.pfx == vex_f3 && !evex.w && !evex.brs )
-                    --disp8scale;
-                break;
-            }
-            break;
-
-        case ext_0f38:
-            d = ext0f38_table[b].to_mem ? DstMem | SrcReg
-                                        : DstReg | SrcMem;
-            if ( ext0f38_table[b].two_op )
-                d |= TwoOp;
-            if ( ext0f38_table[b].vsib )
-                d |= vSIB;
-            state->simd_size = ext0f38_table[b].simd_size;
-            if ( evex_encoded() )
-            {
-                /*
-                 * VPMOVUS* are identical to VPMOVS* Disp8-scaling-wise, but
-                 * their attributes don't match those of the vex_66 encoded
-                 * insns with the same base opcodes. Rather than adding new
-                 * columns to the table, handle this here for now.
-                 */
-                if ( evex.pfx != vex_f3 || (b & 0xf8) != 0x10 )
-                    disp8scale = decode_disp8scale(ext0f38_table[b].d8s, state);
-                else
-                {
-                    disp8scale = decode_disp8scale(ext0f38_table[b ^ 0x30].d8s,
-                                                   state);
-                    state->simd_size = simd_other;
-                }
-
-                switch ( b )
-                {
-                /* vp4dpwssd{,s} need special casing */
-                case 0x52: case 0x53:
-                /* v4f{,n}madd{p,s}s need special casing */
-                case 0x9a: case 0x9b: case 0xaa: case 0xab:
-                    if ( evex.pfx == vex_f2 )
-                    {
-                        disp8scale = 4;
-                        state->simd_size = simd_128;
-                    }
-                    break;
-                }
-            }
-            break;
-
-        case ext_0f3a:
-            /*
-             * Cannot update d here yet, as the immediate operand still
-             * needs fetching.
-             */
-            state->simd_size = ext0f3a_table[b].simd_size;
-            if ( evex_encoded() )
-                disp8scale = decode_disp8scale(ext0f3a_table[b].d8s, state);
-            break;
-
-        case ext_8f09:
-            if ( ext8f09_table[b].two_op )
-                d |= TwoOp;
-            state->simd_size = ext8f09_table[b].simd_size;
-            break;
-
-        case ext_8f08:
-        case ext_8f0a:
-            /*
-             * Cannot update d here yet, as the immediate operand still
-             * needs fetching.
-             */
-            break;
-
-        default:
-            ASSERT_UNREACHABLE();
-            return X86EMUL_UNIMPLEMENTED;
-        }
-
-        if ( modrm_mod == 3 )
-        {
-            generate_exception_if(d & vSIB, EXC_UD);
-            modrm_rm |= ((rex_prefix & 1) << 3) |
-                        ((evex_encoded() && !evex.x) << 4);
-            ea.type = OP_REG;
-        }
-        else if ( ad_bytes == 2 )
-        {
-            /* 16-bit ModR/M decode. */
-            generate_exception_if(d & vSIB, EXC_UD);
-            ea.type = OP_MEM;
-            switch ( modrm_rm )
-            {
-            case 0:
-                ea.mem.off = state->regs->bx + state->regs->si;
-                break;
-            case 1:
-                ea.mem.off = state->regs->bx + state->regs->di;
-                break;
-            case 2:
-                ea.mem.seg = x86_seg_ss;
-                ea.mem.off = state->regs->bp + state->regs->si;
-                break;
-            case 3:
-                ea.mem.seg = x86_seg_ss;
-                ea.mem.off = state->regs->bp + state->regs->di;
-                break;
-            case 4:
-                ea.mem.off = state->regs->si;
-                break;
-            case 5:
-                ea.mem.off = state->regs->di;
-                break;
-            case 6:
-                if ( modrm_mod == 0 )
-                    break;
-                ea.mem.seg = x86_seg_ss;
-                ea.mem.off = state->regs->bp;
-                break;
-            case 7:
-                ea.mem.off = state->regs->bx;
-                break;
-            }
-            switch ( modrm_mod )
-            {
-            case 0:
-                if ( modrm_rm == 6 )
-                    ea.mem.off = insn_fetch_type(int16_t);
-                break;
-            case 1:
-                ea.mem.off += insn_fetch_type(int8_t) * (1 << disp8scale);
-                break;
-            case 2:
-                ea.mem.off += insn_fetch_type(int16_t);
-                break;
-            }
-        }
-        else
-        {
-            /* 32/64-bit ModR/M decode. */
-            ea.type = OP_MEM;
-            if ( modrm_rm == 4 )
-            {
-                uint8_t sib = insn_fetch_type(uint8_t);
-                uint8_t sib_base = (sib & 7) | ((rex_prefix << 3) & 8);
-
-                state->sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
-                state->sib_scale = (sib >> 6) & 3;
-                if ( unlikely(d & vSIB) )
-                    state->sib_index |= (mode_64bit() && evex_encoded() &&
-                                         !evex.RX) << 4;
-                else if ( state->sib_index != 4 )
-                {
-                    ea.mem.off = *decode_gpr(state->regs, state->sib_index);
-                    ea.mem.off <<= state->sib_scale;
-                }
-                if ( (modrm_mod == 0) && ((sib_base & 7) == 5) )
-                    ea.mem.off += insn_fetch_type(int32_t);
-                else if ( sib_base == 4 )
-                {
-                    ea.mem.seg  = x86_seg_ss;
-                    ea.mem.off += state->regs->r(sp);
-                    if ( !ext && (b == 0x8f) )
-                        /* POP <rm> computes its EA post increment. */
-                        ea.mem.off += ((mode_64bit() && (op_bytes == 4))
-                                       ? 8 : op_bytes);
-                }
-                else if ( sib_base == 5 )
-                {
-                    ea.mem.seg  = x86_seg_ss;
-                    ea.mem.off += state->regs->r(bp);
-                }
-                else
-                    ea.mem.off += *decode_gpr(state->regs, sib_base);
-            }
-            else
-            {
-                generate_exception_if(d & vSIB, EXC_UD);
-                modrm_rm |= (rex_prefix & 1) << 3;
-                ea.mem.off = *decode_gpr(state->regs, modrm_rm);
-                if ( (modrm_rm == 5) && (modrm_mod != 0) )
-                    ea.mem.seg = x86_seg_ss;
-            }
-            switch ( modrm_mod )
-            {
-            case 0:
-                if ( (modrm_rm & 7) != 5 )
-                    break;
-                ea.mem.off = insn_fetch_type(int32_t);
-                pc_rel = mode_64bit();
-                break;
-            case 1:
-                ea.mem.off += insn_fetch_type(int8_t) * (1 << disp8scale);
-                break;
-            case 2:
-                ea.mem.off += insn_fetch_type(int32_t);
-                break;
-            }
-        }
-    }
-    else
-    {
-        modrm_mod = 0xff;
-        modrm_reg = modrm_rm = modrm = 0;
-    }
-
-    if ( override_seg != x86_seg_none )
-        ea.mem.seg = override_seg;
-
-    /* Fetch the immediate operand, if present. */
-    switch ( d & SrcMask )
-    {
-        unsigned int bytes;
-
-    case SrcImm:
-        if ( !(d & ByteOp) )
-        {
-            if ( mode_64bit() && !amd_like(ctxt) &&
-                 ((ext == ext_none && (b | 1) == 0xe9) /* call / jmp */ ||
-                  (ext == ext_0f && (b | 0xf) == 0x8f) /* jcc */ ) )
-                op_bytes = 4;
-            bytes = op_bytes != 8 ? op_bytes : 4;
-        }
-        else
-        {
-    case SrcImmByte:
-            bytes = 1;
-        }
-        /* NB. Immediates are sign-extended as necessary. */
-        switch ( bytes )
-        {
-        case 1: imm1 = insn_fetch_type(int8_t);  break;
-        case 2: imm1 = insn_fetch_type(int16_t); break;
-        case 4: imm1 = insn_fetch_type(int32_t); break;
-        }
-        break;
-    case SrcImm16:
-        imm1 = insn_fetch_type(uint16_t);
-        break;
-    }
-
-    ctxt->opcode = opcode;
-    state->desc = d;
-
-    switch ( ext )
-    {
-    case ext_none:
-        rc = x86_decode_onebyte(state, ctxt, ops);
-        break;
-
-    case ext_0f:
-        rc = x86_decode_twobyte(state, ctxt, ops);
-        break;
-
-    case ext_0f38:
-        rc = x86_decode_0f38(state, ctxt, ops);
-        break;
-
-    case ext_0f3a:
-        d = ext0f3a_table[b].to_mem ? DstMem | SrcReg : DstReg | SrcMem;
-        if ( ext0f3a_table[b].two_op )
-            d |= TwoOp;
-        else if ( ext0f3a_table[b].four_op && !mode_64bit() && vex.opcx )
-            imm1 &= 0x7f;
-        state->desc = d;
-        rc = x86_decode_0f3a(state, ctxt, ops);
-        break;
-
-    case ext_8f08:
-        d = DstReg | SrcMem;
-        if ( ext8f08_table[b].two_op )
-            d |= TwoOp;
-        else if ( ext8f08_table[b].four_op && !mode_64bit() )
-            imm1 &= 0x7f;
-        state->desc = d;
-        state->simd_size = ext8f08_table[b].simd_size;
-        break;
-
-    case ext_8f09:
-    case ext_8f0a:
-        break;
-
-    default:
-        ASSERT_UNREACHABLE();
-        return X86EMUL_UNIMPLEMENTED;
-    }
-
-    if ( ea.type == OP_MEM )
-    {
-        if ( pc_rel )
-            ea.mem.off += state->ip;
-
-        ea.mem.off = truncate_ea(ea.mem.off);
-    }
-
-    /*
-     * Simple op_bytes calculations. More complicated cases produce 0
-     * and are further handled during execute.
-     */
-    switch ( state->simd_size )
-    {
-    case simd_none:
-        /*
-         * When prefix 66 has a meaning different from operand-size override,
-         * operand size defaults to 4 and can't be overridden to 2.
-         */
-        if ( op_bytes == 2 &&
-             (ctxt->opcode & X86EMUL_OPC_PFX_MASK) == X86EMUL_OPC_66(0, 0) )
-            op_bytes = 4;
-        break;
-
-#ifndef X86EMUL_NO_SIMD
-    case simd_packed_int:
-        switch ( vex.pfx )
-        {
-        case vex_none:
-            if ( !vex.opcx )
-            {
-                op_bytes = 8;
-                break;
-            }
-            /* fall through */
-        case vex_66:
-            op_bytes = 16 << evex.lr;
-            break;
-        default:
-            op_bytes = 0;
-            break;
-        }
-        break;
-
-    case simd_single_fp:
-        if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
-        {
-            op_bytes = 0;
-            break;
-    case simd_packed_fp:
-            if ( vex.pfx & VEX_PREFIX_SCALAR_MASK )
-            {
-                op_bytes = 0;
-                break;
-            }
-        }
-        /* fall through */
-    case simd_any_fp:
-        switch ( vex.pfx )
-        {
-        default:
-            op_bytes = 16 << evex.lr;
-            break;
-        case vex_f3:
-            generate_exception_if(evex_encoded() && evex.w, EXC_UD);
-            op_bytes = 4;
-            break;
-        case vex_f2:
-            generate_exception_if(evex_encoded() && !evex.w, EXC_UD);
-            op_bytes = 8;
-            break;
-        }
-        break;
-
-    case simd_scalar_opc:
-        op_bytes = 4 << (ctxt->opcode & 1);
-        break;
-
-    case simd_scalar_vexw:
-        op_bytes = 4 << vex.w;
-        break;
-
-    case simd_128:
-        /* The special cases here are MMX shift insns. */
-        op_bytes = vex.opcx || vex.pfx ? 16 : 8;
-        break;
-
-    case simd_256:
-        op_bytes = 32;
-        break;
-#endif /* !X86EMUL_NO_SIMD */
-
-    default:
-        op_bytes = 0;
-        break;
-    }
-
- done:
-    return rc;
-}
-
-/* No insn fetching past this point. */
-#undef insn_fetch_bytes
-#undef insn_fetch_type
-
 /* Undo DEBUG wrapper. */
 #undef x86_emulate
 
@@ -3000,7 +1281,7 @@ x86_emulate(
                            (_regs.eflags & X86_EFLAGS_VIP)),
                           EXC_GP, 0);
 
-    rc = x86_decode(&state, ctxt, ops);
+    rc = x86emul_decode(&state, ctxt, ops);
     if ( rc != X86EMUL_OKAY )
         return rc;
 
@@ -10497,46 +8778,6 @@ int x86_emulate_wrapper(
 }
 #endif
 
-struct x86_emulate_state *
-x86_decode_insn(
-    struct x86_emulate_ctxt *ctxt,
-    int (*insn_fetch)(
-        enum x86_segment seg, unsigned long offset,
-        void *p_data, unsigned int bytes,
-        struct x86_emulate_ctxt *ctxt))
-{
-    static DEFINE_PER_CPU(struct x86_emulate_state, state);
-    struct x86_emulate_state *state = &this_cpu(state);
-    const struct x86_emulate_ops ops = {
-        .insn_fetch = insn_fetch,
-        .read       = x86emul_unhandleable_rw,
-    };
-    int rc;
-
-    init_context(ctxt);
-
-    rc = x86_decode(state, ctxt, &ops);
-    if ( unlikely(rc != X86EMUL_OKAY) )
-        return ERR_PTR(-rc);
-
-#if defined(__XEN__) && !defined(NDEBUG)
-    /*
-     * While we avoid memory allocation (by use of per-CPU data) above,
-     * nevertheless make sure callers properly release the state structure
-     * for forward compatibility.
-     */
-    if ( state->caller )
-    {
-        printk(XENLOG_ERR "Unreleased emulation state acquired by %ps\n",
-               state->caller);
-        dump_execution_state();
-    }
-    state->caller = __builtin_return_address(0);
-#endif
-
-    return state;
-}
-
 static inline void check_state(const struct x86_emulate_state *state)
 {
 #if defined(__XEN__) && !defined(NDEBUG)



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 6/7] x86emul: move x86_emul_blk() to separate source file
  2021-08-11 12:21 [PATCH 0/7] x86emul: a few small steps towards disintegration Jan Beulich
                   ` (4 preceding siblings ...)
  2021-08-11 12:24 ` [PATCH 5/7] x86emul: split off insn decoding Jan Beulich
@ 2021-08-11 12:25 ` Jan Beulich
  2021-08-11 12:25 ` [PATCH 7/7] x86emul: move various utility functions to separate source files Jan Beulich
  6 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2021-08-11 12:25 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

The function is already non-trivial and is expected to further grow.

Code moved gets slightly adjusted in a few places, e.g. replacing EXC_*
by X86_EXC_* (such that EXC_* don't need to move as well; we want these
to be phased out anyway).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -252,7 +252,7 @@ endif # 32-bit override
 
 OBJS := x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o
 OBJS += x86_emulate/0f01.o x86_emulate/0fae.o x86_emulate/0fc7.o
-OBJS += x86_emulate/decode.o x86_emulate/fpu.o
+OBJS += x86_emulate/blk.o x86_emulate/decode.o x86_emulate/fpu.o
 
 $(TARGET): $(OBJS)
 	$(HOSTCC) $(HOSTCFLAGS) -o $@ $^
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -35,7 +35,10 @@ static bool use_xsave;
  * (When debugging the emulator, care needs to be taken when inserting
  * printf() or alike function calls into regions using this.)
  */
-#define FXSAVE_AREA ((struct x86_fxsr *)fpu_save_area)
+struct x86_fxsr *get_fpu_save_area(void)
+{
+    return (void *)fpu_save_area;
+}
 
 void emul_save_fpu_state(void)
 {
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -83,6 +83,8 @@ bool emul_test_init(void);
 void emul_save_fpu_state(void);
 void emul_restore_fpu_state(void);
 
+struct x86_fxsr *get_fpu_save_area(void);
+
 /*
  * In order to reasonably use the above, wrap library calls we use and which we
  * think might access any of the FPU state into wrappers saving/restoring state
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -24,8 +24,6 @@
 #define cpu_has_amd_erratum(nr) \
         cpu_has_amd_erratum(&current_cpu_data, AMD_ERRATUM_##nr)
 
-#define FXSAVE_AREA current->arch.fpu_ctxt
-
 #include "x86_emulate/x86_emulate.c"
 
 int x86emul_read_xcr(unsigned int reg, uint64_t *val,
--- a/xen/arch/x86/x86_emulate/Makefile
+++ b/xen/arch/x86/x86_emulate/Makefile
@@ -1,5 +1,6 @@
 obj-y += 0f01.o
 obj-y += 0fae.o
 obj-y += 0fc7.o
+obj-y += blk.o
 obj-y += decode.o
 obj-$(CONFIG_HVM) += fpu.o
--- /dev/null
+++ b/xen/arch/x86/x86_emulate/blk.c
@@ -0,0 +1,396 @@
+/******************************************************************************
+ * blk.c - helper for x86_emulate.c
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "private.h"
+
+#if !defined(X86EMUL_NO_FPU) || !defined(X86EMUL_NO_MMX) || \
+    !defined(X86EMUL_NO_SIMD)
+# ifdef __XEN__
+#  include <asm/xstate.h>
+#  define FXSAVE_AREA current->arch.fpu_ctxt
+# else
+#  define FXSAVE_AREA get_fpu_save_area()
+# endif
+#endif
+
+int x86_emul_blk(
+    void *ptr,
+    void *data,
+    unsigned int bytes,
+    uint32_t *eflags,
+    struct x86_emulate_state *s,
+    struct x86_emulate_ctxt *ctxt)
+{
+    int rc = X86EMUL_OKAY;
+
+    switch ( s->blk )
+    {
+        bool zf;
+#ifndef X86EMUL_NO_FPU
+        struct {
+            struct x87_env32 env;
+            struct {
+               uint8_t bytes[10];
+            } freg[8];
+        } fpstate;
+#endif
+
+        /*
+         * Throughout this switch(), memory clobbers are used to compensate
+         * that other operands may not properly express the (full) memory
+         * ranges covered.
+         */
+    case blk_enqcmd:
+        ASSERT(bytes == 64);
+        if ( ((unsigned long)ptr & 0x3f) )
+        {
+            ASSERT_UNREACHABLE();
+            return X86EMUL_UNHANDLEABLE;
+        }
+        *eflags &= ~EFLAGS_MASK;
+#ifdef HAVE_AS_ENQCMD
+        asm ( "enqcmds (%[src]), %[dst]" ASM_FLAG_OUT(, "; setz %[zf]")
+              : [zf] ASM_FLAG_OUT("=@ccz", "=qm") (zf)
+              : [src] "r" (data), [dst] "r" (ptr) : "memory" );
+#else
+        /* enqcmds (%rsi), %rdi */
+        asm ( ".byte 0xf3, 0x0f, 0x38, 0xf8, 0x3e"
+              ASM_FLAG_OUT(, "; setz %[zf]")
+              : [zf] ASM_FLAG_OUT("=@ccz", "=qm") (zf)
+              : "S" (data), "D" (ptr) : "memory" );
+#endif
+        if ( zf )
+            *eflags |= X86_EFLAGS_ZF;
+        break;
+
+#ifndef X86EMUL_NO_FPU
+
+    case blk_fld:
+        ASSERT(!data);
+
+        /* s->rex_prefix carries CR0.PE && !EFLAGS.VM setting */
+        switch ( bytes )
+        {
+        case sizeof(fpstate.env): /* 32-bit FLDENV */
+        case sizeof(fpstate):     /* 32-bit FRSTOR */
+            memcpy(&fpstate.env, ptr, sizeof(fpstate.env));
+            if ( !s->rex_prefix )
+            {
+                /* Convert 32-bit real/vm86 to 32-bit prot format. */
+                unsigned int fip = fpstate.env.mode.real.fip_lo +
+                                   (fpstate.env.mode.real.fip_hi << 16);
+                unsigned int fdp = fpstate.env.mode.real.fdp_lo +
+                                   (fpstate.env.mode.real.fdp_hi << 16);
+                unsigned int fop = fpstate.env.mode.real.fop;
+
+                fpstate.env.mode.prot.fip = fip & 0xf;
+                fpstate.env.mode.prot.fcs = fip >> 4;
+                fpstate.env.mode.prot.fop = fop;
+                fpstate.env.mode.prot.fdp = fdp & 0xf;
+                fpstate.env.mode.prot.fds = fdp >> 4;
+            }
+
+            if ( bytes == sizeof(fpstate.env) )
+                ptr = NULL;
+            else
+                ptr += sizeof(fpstate.env);
+            break;
+
+        case sizeof(struct x87_env16):                        /* 16-bit FLDENV */
+        case sizeof(struct x87_env16) + sizeof(fpstate.freg): /* 16-bit FRSTOR */
+        {
+            const struct x87_env16 *env = ptr;
+
+            fpstate.env.fcw = env->fcw;
+            fpstate.env.fsw = env->fsw;
+            fpstate.env.ftw = env->ftw;
+
+            if ( s->rex_prefix )
+            {
+                /* Convert 16-bit prot to 32-bit prot format. */
+                fpstate.env.mode.prot.fip = env->mode.prot.fip;
+                fpstate.env.mode.prot.fcs = env->mode.prot.fcs;
+                fpstate.env.mode.prot.fdp = env->mode.prot.fdp;
+                fpstate.env.mode.prot.fds = env->mode.prot.fds;
+                fpstate.env.mode.prot.fop = 0; /* unknown */
+            }
+            else
+            {
+                /* Convert 16-bit real/vm86 to 32-bit prot format. */
+                unsigned int fip = env->mode.real.fip_lo +
+                                   (env->mode.real.fip_hi << 16);
+                unsigned int fdp = env->mode.real.fdp_lo +
+                                   (env->mode.real.fdp_hi << 16);
+                unsigned int fop = env->mode.real.fop;
+
+                fpstate.env.mode.prot.fip = fip & 0xf;
+                fpstate.env.mode.prot.fcs = fip >> 4;
+                fpstate.env.mode.prot.fop = fop;
+                fpstate.env.mode.prot.fdp = fdp & 0xf;
+                fpstate.env.mode.prot.fds = fdp >> 4;
+            }
+
+            if ( bytes == sizeof(*env) )
+                ptr = NULL;
+            else
+                ptr += sizeof(*env);
+            break;
+        }
+
+        default:
+            ASSERT_UNREACHABLE();
+            return X86EMUL_UNHANDLEABLE;
+        }
+
+        if ( ptr )
+        {
+            memcpy(fpstate.freg, ptr, sizeof(fpstate.freg));
+            asm volatile ( "frstor %0" :: "m" (fpstate) );
+        }
+        else
+            asm volatile ( "fldenv %0" :: "m" (fpstate.env) );
+        break;
+
+    case blk_fst:
+        ASSERT(!data);
+
+        /* Don't chance consuming uninitialized data. */
+        memset(&fpstate, 0, sizeof(fpstate));
+        if ( bytes > sizeof(fpstate.env) )
+            asm ( "fnsave %0" : "+m" (fpstate) );
+        else
+            asm ( "fnstenv %0" : "+m" (fpstate.env) );
+
+        /* s->rex_prefix carries CR0.PE && !EFLAGS.VM setting */
+        switch ( bytes )
+        {
+        case sizeof(fpstate.env): /* 32-bit FNSTENV */
+        case sizeof(fpstate):     /* 32-bit FNSAVE */
+            if ( !s->rex_prefix )
+            {
+                /* Convert 32-bit prot to 32-bit real/vm86 format. */
+                unsigned int fip = fpstate.env.mode.prot.fip +
+                                   (fpstate.env.mode.prot.fcs << 4);
+                unsigned int fdp = fpstate.env.mode.prot.fdp +
+                                   (fpstate.env.mode.prot.fds << 4);
+                unsigned int fop = fpstate.env.mode.prot.fop;
+
+                memset(&fpstate.env.mode, 0, sizeof(fpstate.env.mode));
+                fpstate.env.mode.real.fip_lo = fip;
+                fpstate.env.mode.real.fip_hi = fip >> 16;
+                fpstate.env.mode.real.fop = fop;
+                fpstate.env.mode.real.fdp_lo = fdp;
+                fpstate.env.mode.real.fdp_hi = fdp >> 16;
+            }
+            memcpy(ptr, &fpstate.env, sizeof(fpstate.env));
+            if ( bytes == sizeof(fpstate.env) )
+                ptr = NULL;
+            else
+                ptr += sizeof(fpstate.env);
+            break;
+
+        case sizeof(struct x87_env16):                        /* 16-bit FNSTENV */
+        case sizeof(struct x87_env16) + sizeof(fpstate.freg): /* 16-bit FNSAVE */
+            if ( s->rex_prefix )
+            {
+                /* Convert 32-bit prot to 16-bit prot format. */
+                struct x87_env16 *env = ptr;
+
+                env->fcw = fpstate.env.fcw;
+                env->fsw = fpstate.env.fsw;
+                env->ftw = fpstate.env.ftw;
+                env->mode.prot.fip = fpstate.env.mode.prot.fip;
+                env->mode.prot.fcs = fpstate.env.mode.prot.fcs;
+                env->mode.prot.fdp = fpstate.env.mode.prot.fdp;
+                env->mode.prot.fds = fpstate.env.mode.prot.fds;
+            }
+            else
+            {
+                /* Convert 32-bit prot to 16-bit real/vm86 format. */
+                unsigned int fip = fpstate.env.mode.prot.fip +
+                                   (fpstate.env.mode.prot.fcs << 4);
+                unsigned int fdp = fpstate.env.mode.prot.fdp +
+                                   (fpstate.env.mode.prot.fds << 4);
+                struct x87_env16 env = {
+                    .fcw = fpstate.env.fcw,
+                    .fsw = fpstate.env.fsw,
+                    .ftw = fpstate.env.ftw,
+                    .mode.real.fip_lo = fip,
+                    .mode.real.fip_hi = fip >> 16,
+                    .mode.real.fop = fpstate.env.mode.prot.fop,
+                    .mode.real.fdp_lo = fdp,
+                    .mode.real.fdp_hi = fdp >> 16
+                };
+
+                memcpy(ptr, &env, sizeof(env));
+            }
+            if ( bytes == sizeof(struct x87_env16) )
+                ptr = NULL;
+            else
+                ptr += sizeof(struct x87_env16);
+            break;
+
+        default:
+            ASSERT_UNREACHABLE();
+            return X86EMUL_UNHANDLEABLE;
+        }
+
+        if ( ptr )
+            memcpy(ptr, fpstate.freg, sizeof(fpstate.freg));
+        break;
+
+#endif /* X86EMUL_NO_FPU */
+
+#if !defined(X86EMUL_NO_FPU) || !defined(X86EMUL_NO_MMX) || \
+    !defined(X86EMUL_NO_SIMD)
+
+    case blk_fxrstor:
+    {
+        struct x86_fxsr *fxsr = FXSAVE_AREA;
+
+        ASSERT(!data);
+        ASSERT(bytes == sizeof(*fxsr));
+        ASSERT(s->op_bytes <= bytes);
+
+        if ( s->op_bytes < sizeof(*fxsr) )
+        {
+            if ( s->rex_prefix & REX_W )
+            {
+                /*
+                 * The only way to force fxsaveq on a wide range of gas
+                 * versions. On older versions the rex64 prefix works only if
+                 * we force an addressing mode that doesn't require extended
+                 * registers.
+                 */
+                asm volatile ( ".byte 0x48; fxsave (%1)"
+                               : "=m" (*fxsr) : "R" (fxsr) );
+            }
+            else
+                asm volatile ( "fxsave %0" : "=m" (*fxsr) );
+        }
+
+        /*
+         * Don't chance the reserved or available ranges to contain any
+         * data FXRSTOR may actually consume in some way: Copy only the
+         * defined portion, and zero the rest.
+         */
+        memcpy(fxsr, ptr, min(s->op_bytes,
+                              (unsigned int)offsetof(struct x86_fxsr, rsvd)));
+        memset(fxsr->rsvd, 0, sizeof(*fxsr) - offsetof(struct x86_fxsr, rsvd));
+
+        generate_exception_if(fxsr->mxcsr & ~mxcsr_mask, X86_EXC_GP, 0);
+
+        if ( s->rex_prefix & REX_W )
+        {
+            /* See above for why operand/constraints are this way. */
+            asm volatile ( ".byte 0x48; fxrstor (%1)"
+                           :: "m" (*fxsr), "R" (fxsr) );
+        }
+        else
+            asm volatile ( "fxrstor %0" :: "m" (*fxsr) );
+        break;
+    }
+
+    case blk_fxsave:
+    {
+        struct x86_fxsr *fxsr = FXSAVE_AREA;
+
+        ASSERT(!data);
+        ASSERT(bytes == sizeof(*fxsr));
+        ASSERT(s->op_bytes <= bytes);
+
+        if ( s->op_bytes < sizeof(*fxsr) )
+            /* Don't chance consuming uninitialized data. */
+            memset(fxsr, 0, s->op_bytes);
+        else
+            fxsr = ptr;
+
+        if ( s->rex_prefix & REX_W )
+        {
+            /* See above for why operand/constraints are this way. */
+            asm volatile ( ".byte 0x48; fxsave (%1)"
+                           : "=m" (*fxsr) : "R" (fxsr) );
+        }
+        else
+            asm volatile ( "fxsave %0" : "=m" (*fxsr) );
+
+        if ( fxsr != ptr ) /* i.e. s->op_bytes < sizeof(*fxsr) */
+            memcpy(ptr, fxsr, s->op_bytes);
+        break;
+    }
+
+#endif /* X86EMUL_NO_{FPU,MMX,SIMD} */
+
+    case blk_movdir:
+        switch ( bytes )
+        {
+#ifdef __x86_64__
+        case sizeof(uint32_t):
+# ifdef HAVE_AS_MOVDIR
+            asm ( "movdiri %0, (%1)"
+                  :: "r" (*(uint32_t *)data), "r" (ptr) : "memory" );
+# else
+            /* movdiri %esi, (%rdi) */
+            asm ( ".byte 0x0f, 0x38, 0xf9, 0x37"
+                  :: "S" (*(uint32_t *)data), "D" (ptr) : "memory" );
+# endif
+            break;
+#endif
+
+        case sizeof(unsigned long):
+#ifdef HAVE_AS_MOVDIR
+            asm ( "movdiri %0, (%1)"
+                  :: "r" (*(unsigned long *)data), "r" (ptr) : "memory" );
+#else
+            /* movdiri %rsi, (%rdi) */
+            asm ( ".byte 0x48, 0x0f, 0x38, 0xf9, 0x37"
+                  :: "S" (*(unsigned long *)data), "D" (ptr) : "memory" );
+#endif
+            break;
+
+        case 64:
+            if ( ((unsigned long)ptr & 0x3f) )
+            {
+                ASSERT_UNREACHABLE();
+                return X86EMUL_UNHANDLEABLE;
+            }
+#ifdef HAVE_AS_MOVDIR
+            asm ( "movdir64b (%0), %1" :: "r" (data), "r" (ptr) : "memory" );
+#else
+            /* movdir64b (%rsi), %rdi */
+            asm ( ".byte 0x66, 0x0f, 0x38, 0xf8, 0x3e"
+                  :: "S" (data), "D" (ptr) : "memory" );
+#endif
+            break;
+
+        default:
+            ASSERT_UNREACHABLE();
+            return X86EMUL_UNHANDLEABLE;
+        }
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+ done: __maybe_unused;
+    return rc;
+
+}
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -8342,371 +8342,6 @@ int x86_emul_rmw(
     return X86EMUL_OKAY;
 }
 
-int x86_emul_blk(
-    void *ptr,
-    void *data,
-    unsigned int bytes,
-    uint32_t *eflags,
-    struct x86_emulate_state *state,
-    struct x86_emulate_ctxt *ctxt)
-{
-    int rc = X86EMUL_OKAY;
-
-    switch ( state->blk )
-    {
-        bool zf;
-#ifndef X86EMUL_NO_FPU
-        struct {
-            struct x87_env32 env;
-            struct {
-               uint8_t bytes[10];
-            } freg[8];
-        } fpstate;
-#endif
-
-        /*
-         * Throughout this switch(), memory clobbers are used to compensate
-         * that other operands may not properly express the (full) memory
-         * ranges covered.
-         */
-    case blk_enqcmd:
-        ASSERT(bytes == 64);
-        if ( ((unsigned long)ptr & 0x3f) )
-        {
-            ASSERT_UNREACHABLE();
-            return X86EMUL_UNHANDLEABLE;
-        }
-        *eflags &= ~EFLAGS_MASK;
-#ifdef HAVE_AS_ENQCMD
-        asm ( "enqcmds (%[src]), %[dst]" ASM_FLAG_OUT(, "; setz %[zf]")
-              : [zf] ASM_FLAG_OUT("=@ccz", "=qm") (zf)
-              : [src] "r" (data), [dst] "r" (ptr) : "memory" );
-#else
-        /* enqcmds (%rsi), %rdi */
-        asm ( ".byte 0xf3, 0x0f, 0x38, 0xf8, 0x3e"
-              ASM_FLAG_OUT(, "; setz %[zf]")
-              : [zf] ASM_FLAG_OUT("=@ccz", "=qm") (zf)
-              : "S" (data), "D" (ptr) : "memory" );
-#endif
-        if ( zf )
-            *eflags |= X86_EFLAGS_ZF;
-        break;
-
-#ifndef X86EMUL_NO_FPU
-
-    case blk_fld:
-        ASSERT(!data);
-
-        /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */
-        switch ( bytes )
-        {
-        case sizeof(fpstate.env): /* 32-bit FLDENV */
-        case sizeof(fpstate):     /* 32-bit FRSTOR */
-            memcpy(&fpstate.env, ptr, sizeof(fpstate.env));
-            if ( !state->rex_prefix )
-            {
-                /* Convert 32-bit real/vm86 to 32-bit prot format. */
-                unsigned int fip = fpstate.env.mode.real.fip_lo +
-                                   (fpstate.env.mode.real.fip_hi << 16);
-                unsigned int fdp = fpstate.env.mode.real.fdp_lo +
-                                   (fpstate.env.mode.real.fdp_hi << 16);
-                unsigned int fop = fpstate.env.mode.real.fop;
-
-                fpstate.env.mode.prot.fip = fip & 0xf;
-                fpstate.env.mode.prot.fcs = fip >> 4;
-                fpstate.env.mode.prot.fop = fop;
-                fpstate.env.mode.prot.fdp = fdp & 0xf;
-                fpstate.env.mode.prot.fds = fdp >> 4;
-            }
-
-            if ( bytes == sizeof(fpstate.env) )
-                ptr = NULL;
-            else
-                ptr += sizeof(fpstate.env);
-            break;
-
-        case sizeof(struct x87_env16):                        /* 16-bit FLDENV */
-        case sizeof(struct x87_env16) + sizeof(fpstate.freg): /* 16-bit FRSTOR */
-        {
-            const struct x87_env16 *env = ptr;
-
-            fpstate.env.fcw = env->fcw;
-            fpstate.env.fsw = env->fsw;
-            fpstate.env.ftw = env->ftw;
-
-            if ( state->rex_prefix )
-            {
-                /* Convert 16-bit prot to 32-bit prot format. */
-                fpstate.env.mode.prot.fip = env->mode.prot.fip;
-                fpstate.env.mode.prot.fcs = env->mode.prot.fcs;
-                fpstate.env.mode.prot.fdp = env->mode.prot.fdp;
-                fpstate.env.mode.prot.fds = env->mode.prot.fds;
-                fpstate.env.mode.prot.fop = 0; /* unknown */
-            }
-            else
-            {
-                /* Convert 16-bit real/vm86 to 32-bit prot format. */
-                unsigned int fip = env->mode.real.fip_lo +
-                                   (env->mode.real.fip_hi << 16);
-                unsigned int fdp = env->mode.real.fdp_lo +
-                                   (env->mode.real.fdp_hi << 16);
-                unsigned int fop = env->mode.real.fop;
-
-                fpstate.env.mode.prot.fip = fip & 0xf;
-                fpstate.env.mode.prot.fcs = fip >> 4;
-                fpstate.env.mode.prot.fop = fop;
-                fpstate.env.mode.prot.fdp = fdp & 0xf;
-                fpstate.env.mode.prot.fds = fdp >> 4;
-            }
-
-            if ( bytes == sizeof(*env) )
-                ptr = NULL;
-            else
-                ptr += sizeof(*env);
-            break;
-        }
-
-        default:
-            ASSERT_UNREACHABLE();
-            return X86EMUL_UNHANDLEABLE;
-        }
-
-        if ( ptr )
-        {
-            memcpy(fpstate.freg, ptr, sizeof(fpstate.freg));
-            asm volatile ( "frstor %0" :: "m" (fpstate) );
-        }
-        else
-            asm volatile ( "fldenv %0" :: "m" (fpstate.env) );
-        break;
-
-    case blk_fst:
-        ASSERT(!data);
-
-        /* Don't chance consuming uninitialized data. */
-        memset(&fpstate, 0, sizeof(fpstate));
-        if ( bytes > sizeof(fpstate.env) )
-            asm ( "fnsave %0" : "+m" (fpstate) );
-        else
-            asm ( "fnstenv %0" : "+m" (fpstate.env) );
-
-        /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */
-        switch ( bytes )
-        {
-        case sizeof(fpstate.env): /* 32-bit FNSTENV */
-        case sizeof(fpstate):     /* 32-bit FNSAVE */
-            if ( !state->rex_prefix )
-            {
-                /* Convert 32-bit prot to 32-bit real/vm86 format. */
-                unsigned int fip = fpstate.env.mode.prot.fip +
-                                   (fpstate.env.mode.prot.fcs << 4);
-                unsigned int fdp = fpstate.env.mode.prot.fdp +
-                                   (fpstate.env.mode.prot.fds << 4);
-                unsigned int fop = fpstate.env.mode.prot.fop;
-
-                memset(&fpstate.env.mode, 0, sizeof(fpstate.env.mode));
-                fpstate.env.mode.real.fip_lo = fip;
-                fpstate.env.mode.real.fip_hi = fip >> 16;
-                fpstate.env.mode.real.fop = fop;
-                fpstate.env.mode.real.fdp_lo = fdp;
-                fpstate.env.mode.real.fdp_hi = fdp >> 16;
-            }
-            memcpy(ptr, &fpstate.env, sizeof(fpstate.env));
-            if ( bytes == sizeof(fpstate.env) )
-                ptr = NULL;
-            else
-                ptr += sizeof(fpstate.env);
-            break;
-
-        case sizeof(struct x87_env16):                        /* 16-bit FNSTENV */
-        case sizeof(struct x87_env16) + sizeof(fpstate.freg): /* 16-bit FNSAVE */
-            if ( state->rex_prefix )
-            {
-                /* Convert 32-bit prot to 16-bit prot format. */
-                struct x87_env16 *env = ptr;
-
-                env->fcw = fpstate.env.fcw;
-                env->fsw = fpstate.env.fsw;
-                env->ftw = fpstate.env.ftw;
-                env->mode.prot.fip = fpstate.env.mode.prot.fip;
-                env->mode.prot.fcs = fpstate.env.mode.prot.fcs;
-                env->mode.prot.fdp = fpstate.env.mode.prot.fdp;
-                env->mode.prot.fds = fpstate.env.mode.prot.fds;
-            }
-            else
-            {
-                /* Convert 32-bit prot to 16-bit real/vm86 format. */
-                unsigned int fip = fpstate.env.mode.prot.fip +
-                                   (fpstate.env.mode.prot.fcs << 4);
-                unsigned int fdp = fpstate.env.mode.prot.fdp +
-                                   (fpstate.env.mode.prot.fds << 4);
-                struct x87_env16 env = {
-                    .fcw = fpstate.env.fcw,
-                    .fsw = fpstate.env.fsw,
-                    .ftw = fpstate.env.ftw,
-                    .mode.real.fip_lo = fip,
-                    .mode.real.fip_hi = fip >> 16,
-                    .mode.real.fop = fpstate.env.mode.prot.fop,
-                    .mode.real.fdp_lo = fdp,
-                    .mode.real.fdp_hi = fdp >> 16
-                };
-
-                memcpy(ptr, &env, sizeof(env));
-            }
-            if ( bytes == sizeof(struct x87_env16) )
-                ptr = NULL;
-            else
-                ptr += sizeof(struct x87_env16);
-            break;
-
-        default:
-            ASSERT_UNREACHABLE();
-            return X86EMUL_UNHANDLEABLE;
-        }
-
-        if ( ptr )
-            memcpy(ptr, fpstate.freg, sizeof(fpstate.freg));
-        break;
-
-#endif /* X86EMUL_NO_FPU */
-
-#if !defined(X86EMUL_NO_FPU) || !defined(X86EMUL_NO_MMX) || \
-    !defined(X86EMUL_NO_SIMD)
-
-    case blk_fxrstor:
-    {
-        struct x86_fxsr *fxsr = FXSAVE_AREA;
-
-        ASSERT(!data);
-        ASSERT(bytes == sizeof(*fxsr));
-        ASSERT(state->op_bytes <= bytes);
-
-        if ( state->op_bytes < sizeof(*fxsr) )
-        {
-            if ( state->rex_prefix & REX_W )
-            {
-                /*
-                 * The only way to force fxsaveq on a wide range of gas
-                 * versions. On older versions the rex64 prefix works only if
-                 * we force an addressing mode that doesn't require extended
-                 * registers.
-                 */
-                asm volatile ( ".byte 0x48; fxsave (%1)"
-                               : "=m" (*fxsr) : "R" (fxsr) );
-            }
-            else
-                asm volatile ( "fxsave %0" : "=m" (*fxsr) );
-        }
-
-        /*
-         * Don't chance the reserved or available ranges to contain any
-         * data FXRSTOR may actually consume in some way: Copy only the
-         * defined portion, and zero the rest.
-         */
-        memcpy(fxsr, ptr, min(state->op_bytes,
-                              (unsigned int)offsetof(struct x86_fxsr, rsvd)));
-        memset(fxsr->rsvd, 0, sizeof(*fxsr) - offsetof(struct x86_fxsr, rsvd));
-
-        generate_exception_if(fxsr->mxcsr & ~mxcsr_mask, EXC_GP, 0);
-
-        if ( state->rex_prefix & REX_W )
-        {
-            /* See above for why operand/constraints are this way. */
-            asm volatile ( ".byte 0x48; fxrstor (%1)"
-                           :: "m" (*fxsr), "R" (fxsr) );
-        }
-        else
-            asm volatile ( "fxrstor %0" :: "m" (*fxsr) );
-        break;
-    }
-
-    case blk_fxsave:
-    {
-        struct x86_fxsr *fxsr = FXSAVE_AREA;
-
-        ASSERT(!data);
-        ASSERT(bytes == sizeof(*fxsr));
-        ASSERT(state->op_bytes <= bytes);
-
-        if ( state->op_bytes < sizeof(*fxsr) )
-            /* Don't chance consuming uninitialized data. */
-            memset(fxsr, 0, state->op_bytes);
-        else
-            fxsr = ptr;
-
-        if ( state->rex_prefix & REX_W )
-        {
-            /* See above for why operand/constraints are this way. */
-            asm volatile ( ".byte 0x48; fxsave (%1)"
-                           : "=m" (*fxsr) : "R" (fxsr) );
-        }
-        else
-            asm volatile ( "fxsave %0" : "=m" (*fxsr) );
-
-        if ( fxsr != ptr ) /* i.e. state->op_bytes < sizeof(*fxsr) */
-            memcpy(ptr, fxsr, state->op_bytes);
-        break;
-    }
-
-#endif /* X86EMUL_NO_{FPU,MMX,SIMD} */
-
-    case blk_movdir:
-        switch ( bytes )
-        {
-#ifdef __x86_64__
-        case sizeof(uint32_t):
-# ifdef HAVE_AS_MOVDIR
-            asm ( "movdiri %0, (%1)"
-                  :: "r" (*(uint32_t *)data), "r" (ptr) : "memory" );
-# else
-            /* movdiri %esi, (%rdi) */
-            asm ( ".byte 0x0f, 0x38, 0xf9, 0x37"
-                  :: "S" (*(uint32_t *)data), "D" (ptr) : "memory" );
-# endif
-            break;
-#endif
-
-        case sizeof(unsigned long):
-#ifdef HAVE_AS_MOVDIR
-            asm ( "movdiri %0, (%1)"
-                  :: "r" (*(unsigned long *)data), "r" (ptr) : "memory" );
-#else
-            /* movdiri %rsi, (%rdi) */
-            asm ( ".byte 0x48, 0x0f, 0x38, 0xf9, 0x37"
-                  :: "S" (*(unsigned long *)data), "D" (ptr) : "memory" );
-#endif
-            break;
-
-        case 64:
-            if ( ((unsigned long)ptr & 0x3f) )
-            {
-                ASSERT_UNREACHABLE();
-                return X86EMUL_UNHANDLEABLE;
-            }
-#ifdef HAVE_AS_MOVDIR
-            asm ( "movdir64b (%0), %1" :: "r" (data), "r" (ptr) : "memory" );
-#else
-            /* movdir64b (%rsi), %rdi */
-            asm ( ".byte 0x66, 0x0f, 0x38, 0xf8, 0x3e"
-                  :: "S" (data), "D" (ptr) : "memory" );
-#endif
-            break;
-
-        default:
-            ASSERT_UNREACHABLE();
-            return X86EMUL_UNHANDLEABLE;
-        }
-        break;
-
-    default:
-        ASSERT_UNREACHABLE();
-        return X86EMUL_UNHANDLEABLE;
-    }
-
- done:
-    return rc;
-}
-
 static void __init __maybe_unused build_assertions(void)
 {
     /* Check the values against SReg3 encoding in opcode/ModRM bytes. */



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 7/7] x86emul: move various utility functions to separate source files
  2021-08-11 12:21 [PATCH 0/7] x86emul: a few small steps towards disintegration Jan Beulich
                   ` (5 preceding siblings ...)
  2021-08-11 12:25 ` [PATCH 6/7] x86emul: move x86_emul_blk() to separate source file Jan Beulich
@ 2021-08-11 12:25 ` Jan Beulich
  6 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2021-08-11 12:25 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

Many are needed by the hypervisor only - have one file for this purpose.
Some are also needed by the harness (but not the fuzzer) - have another
file for these.

Code moved gets slightly adjusted in a few places, e.g. replacing
"state" by "s" (like was done for other that has been split off).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -252,7 +252,7 @@ endif # 32-bit override
 
 OBJS := x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o
 OBJS += x86_emulate/0f01.o x86_emulate/0fae.o x86_emulate/0fc7.o
-OBJS += x86_emulate/blk.o x86_emulate/decode.o x86_emulate/fpu.o
+OBJS += x86_emulate/blk.o x86_emulate/decode.o x86_emulate/fpu.o x86_emulate/util.o
 
 $(TARGET): $(OBJS)
 	$(HOSTCC) $(HOSTCFLAGS) -o $@ $^
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -14,7 +14,6 @@
 #include <asm/processor.h> /* current_cpu_info */
 #include <asm/xstate.h>
 #include <asm/amd.h> /* cpu_has_amd_erratum() */
-#include <asm/debugreg.h>
 
 /* Avoid namespace pollution. */
 #undef cmpxchg
@@ -26,128 +25,6 @@
 
 #include "x86_emulate/x86_emulate.c"
 
-int x86emul_read_xcr(unsigned int reg, uint64_t *val,
-                     struct x86_emulate_ctxt *ctxt)
-{
-    switch ( reg )
-    {
-    case 0:
-        *val = current->arch.xcr0;
-        return X86EMUL_OKAY;
-
-    case 1:
-        if ( current->domain->arch.cpuid->xstate.xgetbv1 )
-            break;
-        /* fall through */
-    default:
-        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
-        return X86EMUL_EXCEPTION;
-    }
-
-    *val = xgetbv(reg);
-
-    return X86EMUL_OKAY;
-}
-
-/* Note: May be called with ctxt=NULL. */
-int x86emul_write_xcr(unsigned int reg, uint64_t val,
-                      struct x86_emulate_ctxt *ctxt)
-{
-    switch ( reg )
-    {
-    case 0:
-        break;
-
-    default:
-    gp_fault:
-        if ( ctxt )
-            x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
-        return X86EMUL_EXCEPTION;
-    }
-
-    if ( unlikely(handle_xsetbv(reg, val) != 0) )
-        goto gp_fault;
-
-    return X86EMUL_OKAY;
-}
-
-#ifdef CONFIG_PV
-/* Called with NULL ctxt in hypercall context. */
-int x86emul_read_dr(unsigned int reg, unsigned long *val,
-                    struct x86_emulate_ctxt *ctxt)
-{
-    struct vcpu *curr = current;
-
-    /* HVM support requires a bit more plumbing before it will work. */
-    ASSERT(is_pv_vcpu(curr));
-
-    switch ( reg )
-    {
-    case 0 ... 3:
-        *val = array_access_nospec(curr->arch.dr, reg);
-        break;
-
-    case 4:
-        if ( curr->arch.pv.ctrlreg[4] & X86_CR4_DE )
-            goto ud_fault;
-
-        /* Fallthrough */
-    case 6:
-        *val = curr->arch.dr6;
-        break;
-
-    case 5:
-        if ( curr->arch.pv.ctrlreg[4] & X86_CR4_DE )
-            goto ud_fault;
-
-        /* Fallthrough */
-    case 7:
-        *val = curr->arch.dr7 | curr->arch.pv.dr7_emul;
-        break;
-
-    ud_fault:
-    default:
-        if ( ctxt )
-            x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt);
-
-        return X86EMUL_EXCEPTION;
-    }
-
-    return X86EMUL_OKAY;
-}
-
-int x86emul_write_dr(unsigned int reg, unsigned long val,
-                     struct x86_emulate_ctxt *ctxt)
-{
-    struct vcpu *curr = current;
-
-    /* HVM support requires a bit more plumbing before it will work. */
-    ASSERT(is_pv_vcpu(curr));
-
-    switch ( set_debugreg(curr, reg, val) )
-    {
-    case 0:
-        return X86EMUL_OKAY;
-
-    case -ENODEV:
-        x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt);
-        return X86EMUL_EXCEPTION;
-
-    default:
-        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
-        return X86EMUL_EXCEPTION;
-    }
-}
-#endif /* CONFIG_PV */
-
-int x86emul_cpuid(uint32_t leaf, uint32_t subleaf,
-                  struct cpuid_leaf *res, struct x86_emulate_ctxt *ctxt)
-{
-    guest_cpuid(current, leaf, subleaf, res);
-
-    return X86EMUL_OKAY;
-}
-
 /*
  * Local variables:
  * mode: C
--- a/xen/arch/x86/x86_emulate/Makefile
+++ b/xen/arch/x86/x86_emulate/Makefile
@@ -4,3 +4,5 @@ obj-y += 0fc7.o
 obj-y += blk.o
 obj-y += decode.o
 obj-$(CONFIG_HVM) += fpu.o
+obj-y += util.o
+obj-y += util-xen.o
--- a/xen/arch/x86/x86_emulate/private.h
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -330,6 +330,13 @@ struct x86_emulate_state {
 #endif
 };
 
+static inline void check_state(const struct x86_emulate_state *s)
+{
+#if defined(__XEN__) && !defined(NDEBUG)
+    ASSERT(s->caller);
+#endif
+}
+
 typedef union {
     uint64_t mmx;
     uint64_t __attribute__ ((aligned(16))) xmm[2];
--- /dev/null
+++ b/xen/arch/x86/x86_emulate/util.c
@@ -0,0 +1,298 @@
+/******************************************************************************
+ * util.c
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator utility
+ * functions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "private.h"
+
+unsigned int x86_insn_length(const struct x86_emulate_state *s,
+                             const struct x86_emulate_ctxt *ctxt)
+{
+    check_state(s);
+
+    return s->ip - ctxt->regs->r(ip);
+}
+
+/*
+ * This function means to return 'true' for all supported insns with explicit
+ * accesses to memory.  This means also insns which don't have an explicit
+ * memory operand (like POP), but it does not mean e.g. segment selector
+ * loads, where the descriptor table access is considered an implicit one.
+ */
+bool x86_insn_is_mem_access(const struct x86_emulate_state *s,
+                            const struct x86_emulate_ctxt *ctxt)
+{
+    if ( mode_64bit() && s->not_64bit )
+        return false;
+
+    if ( s->ea.type == OP_MEM )
+    {
+        switch ( ctxt->opcode )
+        {
+        case 0x8d: /* LEA */
+        case X86EMUL_OPC(0x0f, 0x0d): /* PREFETCH */
+        case X86EMUL_OPC(0x0f, 0x18)
+         ... X86EMUL_OPC(0x0f, 0x1f): /* NOP space */
+        case X86EMUL_OPC_66(0x0f, 0x18)
+         ... X86EMUL_OPC_66(0x0f, 0x1f): /* NOP space */
+        case X86EMUL_OPC_F3(0x0f, 0x18)
+         ... X86EMUL_OPC_F3(0x0f, 0x1f): /* NOP space */
+        case X86EMUL_OPC_F2(0x0f, 0x18)
+         ... X86EMUL_OPC_F2(0x0f, 0x1f): /* NOP space */
+        case X86EMUL_OPC(0x0f, 0xb9): /* UD1 */
+        case X86EMUL_OPC(0x0f, 0xff): /* UD0 */
+        case X86EMUL_OPC_EVEX_66(0x0f38, 0xc6): /* V{GATH,SCATT}ERPF*D* */
+        case X86EMUL_OPC_EVEX_66(0x0f38, 0xc7): /* V{GATH,SCATT}ERPF*Q* */
+            return false;
+
+        case X86EMUL_OPC(0x0f, 0x01):
+            return (s->modrm_reg & 7) != 7; /* INVLPG */
+
+        case X86EMUL_OPC(0x0f, 0xae):
+            return (s->modrm_reg & 7) != 7; /* CLFLUSH */
+
+        case X86EMUL_OPC_66(0x0f, 0xae):
+            return (s->modrm_reg & 7) < 6; /* CLWB, CLFLUSHOPT */
+        }
+
+        return true;
+    }
+
+    switch ( ctxt->opcode )
+    {
+    case 0x06 ... 0x07:                  /* PUSH / POP %es */
+    case 0x0e:                           /* PUSH %cs */
+    case 0x16 ... 0x17:                  /* PUSH / POP %ss */
+    case 0x1e ... 0x1f:                  /* PUSH / POP %ds */
+    case 0x50 ... 0x5f:                  /* PUSH / POP reg */
+    case 0x60 ... 0x61:                  /* PUSHA / POPA */
+    case 0x68: case 0x6a:                /* PUSH imm */
+    case 0x6c ... 0x6f:                  /* INS / OUTS */
+    case 0x8f:                           /* POP r/m */
+    case 0x9a:                           /* CALL (far, direct) */
+    case 0x9c ... 0x9d:                  /* PUSHF / POPF */
+    case 0xa4 ... 0xa7:                  /* MOVS / CMPS */
+    case 0xaa ... 0xaf:                  /* STOS / LODS / SCAS */
+    case 0xc2 ... 0xc3:                  /* RET (near) */
+    case 0xc8 ... 0xc9:                  /* ENTER / LEAVE */
+    case 0xca ... 0xcb:                  /* RET (far) */
+    case 0xd7:                           /* XLAT */
+    case 0xe8:                           /* CALL (near, direct) */
+    case X86EMUL_OPC(0x0f, 0xa0):        /* PUSH %fs */
+    case X86EMUL_OPC(0x0f, 0xa1):        /* POP %fs */
+    case X86EMUL_OPC(0x0f, 0xa8):        /* PUSH %gs */
+    case X86EMUL_OPC(0x0f, 0xa9):        /* POP %gs */
+    case X86EMUL_OPC(0x0f, 0xf7):        /* MASKMOVQ */
+    case X86EMUL_OPC_66(0x0f, 0xf7):     /* MASKMOVDQU */
+    case X86EMUL_OPC_VEX_66(0x0f, 0xf7): /* VMASKMOVDQU */
+        return true;
+
+    case 0xff:
+        switch ( s->modrm_reg & 7 )
+        {
+        case 2: /* CALL (near, indirect) */
+        case 6: /* PUSH r/m */
+            return true;
+        }
+        break;
+
+    case X86EMUL_OPC(0x0f, 0x01):
+        /* Cover CLZERO. */
+        return (s->modrm_rm & 7) == 4 && (s->modrm_reg & 7) == 7;
+    }
+
+    return false;
+}
+
+/*
+ * This function means to return 'true' for all supported insns with explicit
+ * writes to memory.  This means also insns which don't have an explicit
+ * memory operand (like PUSH), but it does not mean e.g. segment selector
+ * loads, where the (possible) descriptor table write is considered an
+ * implicit access.
+ */
+bool x86_insn_is_mem_write(const struct x86_emulate_state *s,
+                           const struct x86_emulate_ctxt *ctxt)
+{
+    if ( mode_64bit() && s->not_64bit )
+        return false;
+
+    switch ( s->desc & DstMask )
+    {
+    case DstMem:
+        /* The SrcMem check is to cover {,V}MASKMOV{Q,DQU}. */
+        return s->modrm_mod != 3 || (s->desc & SrcMask) == SrcMem;
+
+    case DstBitBase:
+    case DstImplicit:
+        break;
+
+    default:
+        switch ( ctxt->opcode )
+        {
+        case 0x63:                         /* ARPL */
+            return !mode_64bit();
+
+        case X86EMUL_OPC_66(0x0f38, 0xf8): /* MOVDIR64B */
+        case X86EMUL_OPC_F2(0x0f38, 0xf8): /* ENQCMD */
+        case X86EMUL_OPC_F3(0x0f38, 0xf8): /* ENQCMDS */
+            return true;
+
+        case X86EMUL_OPC_EVEX_F3(0x0f38, 0x10) ...
+             X86EMUL_OPC_EVEX_F3(0x0f38, 0x15): /* VPMOVUS* */
+        case X86EMUL_OPC_EVEX_F3(0x0f38, 0x20) ...
+             X86EMUL_OPC_EVEX_F3(0x0f38, 0x25): /* VPMOVS* */
+        case X86EMUL_OPC_EVEX_F3(0x0f38, 0x30) ...
+             X86EMUL_OPC_EVEX_F3(0x0f38, 0x35): /* VPMOV{D,Q,W}* */
+            return s->modrm_mod != 3;
+        }
+
+        return false;
+    }
+
+    if ( s->modrm_mod == 3 )
+    {
+        switch ( ctxt->opcode )
+        {
+        case 0xff: /* Grp5 */
+            break;
+
+        case X86EMUL_OPC(0x0f, 0x01): /* CLZERO is the odd one. */
+            return (s->modrm_rm & 7) == 4 && (s->modrm_reg & 7) == 7;
+
+        default:
+            return false;
+        }
+    }
+
+    switch ( ctxt->opcode )
+    {
+    case 0x06:                           /* PUSH %es */
+    case 0x0e:                           /* PUSH %cs */
+    case 0x16:                           /* PUSH %ss */
+    case 0x1e:                           /* PUSH %ds */
+    case 0x50 ... 0x57:                  /* PUSH reg */
+    case 0x60:                           /* PUSHA */
+    case 0x68: case 0x6a:                /* PUSH imm */
+    case 0x6c: case 0x6d:                /* INS */
+    case 0x9a:                           /* CALL (far, direct) */
+    case 0x9c:                           /* PUSHF */
+    case 0xa4: case 0xa5:                /* MOVS */
+    case 0xaa: case 0xab:                /* STOS */
+    case 0xc8:                           /* ENTER */
+    case 0xe8:                           /* CALL (near, direct) */
+    case X86EMUL_OPC(0x0f, 0xa0):        /* PUSH %fs */
+    case X86EMUL_OPC(0x0f, 0xa8):        /* PUSH %gs */
+    case X86EMUL_OPC(0x0f, 0xab):        /* BTS */
+    case X86EMUL_OPC(0x0f, 0xb3):        /* BTR */
+    case X86EMUL_OPC(0x0f, 0xbb):        /* BTC */
+        return true;
+
+    case 0xd9:
+        switch ( s->modrm_reg & 7 )
+        {
+        case 2: /* FST m32fp */
+        case 3: /* FSTP m32fp */
+        case 6: /* FNSTENV */
+        case 7: /* FNSTCW */
+            return true;
+        }
+        break;
+
+    case 0xdb:
+        switch ( s->modrm_reg & 7 )
+        {
+        case 1: /* FISTTP m32i */
+        case 2: /* FIST m32i */
+        case 3: /* FISTP m32i */
+        case 7: /* FSTP m80fp */
+            return true;
+        }
+        break;
+
+    case 0xdd:
+        switch ( s->modrm_reg & 7 )
+        {
+        case 1: /* FISTTP m64i */
+        case 2: /* FST m64fp */
+        case 3: /* FSTP m64fp */
+        case 6: /* FNSAVE */
+        case 7: /* FNSTSW */
+            return true;
+        }
+        break;
+
+    case 0xdf:
+        switch ( s->modrm_reg & 7 )
+        {
+        case 1: /* FISTTP m16i */
+        case 2: /* FIST m16i */
+        case 3: /* FISTP m16i */
+        case 6: /* FBSTP */
+        case 7: /* FISTP m64i */
+            return true;
+        }
+        break;
+
+    case 0xff:
+        switch ( s->modrm_reg & 7 )
+        {
+        case 2: /* CALL (near, indirect) */
+        case 3: /* CALL (far, indirect) */
+        case 6: /* PUSH r/m */
+            return true;
+        }
+        break;
+
+    case X86EMUL_OPC(0x0f, 0x01):
+        switch ( s->modrm_reg & 7 )
+        {
+        case 0: /* SGDT */
+        case 1: /* SIDT */
+        case 4: /* SMSW */
+            return true;
+        }
+        break;
+
+    case X86EMUL_OPC(0x0f, 0xae):
+        switch ( s->modrm_reg & 7 )
+        {
+        case 0: /* FXSAVE */
+        /* case 3: STMXCSR - handled above */
+        case 4: /* XSAVE */
+        case 6: /* XSAVEOPT */
+            return true;
+        }
+        break;
+
+    case X86EMUL_OPC(0x0f, 0xba):
+        return (s->modrm_reg & 7) > 4; /* BTS / BTR / BTC */
+
+    case X86EMUL_OPC(0x0f, 0xc7):
+        switch ( s->modrm_reg & 7 )
+        {
+        case 1: /* CMPXCHG{8,16}B */
+        case 4: /* XSAVEC */
+        case 5: /* XSAVES */
+            return true;
+        }
+        break;
+    }
+
+    return false;
+}
--- /dev/null
+++ b/xen/arch/x86/x86_emulate/util-xen.c
@@ -0,0 +1,249 @@
+/******************************************************************************
+ * util-xen.c
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator hypervisor-
+ * only utility functions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "private.h"
+
+#include <xen/nospec.h>
+#include <xen/sched.h>
+#include <asm/debugreg.h>
+#include <asm/xstate.h>
+
+#ifndef NDEBUG
+void x86_emulate_free_state(struct x86_emulate_state *s)
+{
+    check_state(s);
+    s->caller = NULL;
+}
+#endif
+
+unsigned int x86_insn_opsize(const struct x86_emulate_state *s)
+{
+    check_state(s);
+
+    return s->op_bytes << 3;
+}
+
+int x86_insn_modrm(const struct x86_emulate_state *s,
+                   unsigned int *rm, unsigned int *reg)
+{
+    check_state(s);
+
+    if ( unlikely(s->modrm_mod > 3) )
+    {
+        if ( rm )
+            *rm = ~0U;
+        if ( reg )
+            *reg = ~0U;
+        return -EINVAL;
+    }
+
+    if ( rm )
+        *rm = s->modrm_rm;
+    if ( reg )
+        *reg = s->modrm_reg;
+
+    return s->modrm_mod;
+}
+
+unsigned long x86_insn_operand_ea(const struct x86_emulate_state *s,
+                                  enum x86_segment *seg)
+{
+    *seg = s->ea.type == OP_MEM ? s->ea.mem.seg : x86_seg_none;
+
+    check_state(s);
+
+    return s->ea.mem.off;
+}
+
+bool x86_insn_is_portio(const struct x86_emulate_state *s,
+                        const struct x86_emulate_ctxt *ctxt)
+{
+    switch ( ctxt->opcode )
+    {
+    case 0x6c ... 0x6f: /* INS / OUTS */
+    case 0xe4 ... 0xe7: /* IN / OUT imm8 */
+    case 0xec ... 0xef: /* IN / OUT %dx */
+        return true;
+    }
+
+    return false;
+}
+
+bool x86_insn_is_cr_access(const struct x86_emulate_state *s,
+                           const struct x86_emulate_ctxt *ctxt)
+{
+    switch ( ctxt->opcode )
+    {
+        unsigned int ext;
+
+    case X86EMUL_OPC(0x0f, 0x01):
+        if ( x86_insn_modrm(s, NULL, &ext) >= 0
+             && (ext & 5) == 4 ) /* SMSW / LMSW */
+            return true;
+        break;
+
+    case X86EMUL_OPC(0x0f, 0x06): /* CLTS */
+    case X86EMUL_OPC(0x0f, 0x20): /* MOV from CRn */
+    case X86EMUL_OPC(0x0f, 0x22): /* MOV to CRn */
+        return true;
+    }
+
+    return false;
+}
+
+unsigned long x86_insn_immediate(const struct x86_emulate_state *s,
+                                 unsigned int nr)
+{
+    check_state(s);
+
+    switch ( nr )
+    {
+    case 0:
+        return s->imm1;
+    case 1:
+        return s->imm2;
+    }
+
+    return 0;
+}
+
+int x86emul_read_xcr(unsigned int reg, uint64_t *val,
+                     struct x86_emulate_ctxt *ctxt)
+{
+    switch ( reg )
+    {
+    case 0:
+        *val = current->arch.xcr0;
+        return X86EMUL_OKAY;
+
+    case 1:
+        if ( current->domain->arch.cpuid->xstate.xgetbv1 )
+            break;
+        /* fall through */
+    default:
+        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
+        return X86EMUL_EXCEPTION;
+    }
+
+    *val = xgetbv(reg);
+
+    return X86EMUL_OKAY;
+}
+
+/* Note: May be called with ctxt=NULL. */
+int x86emul_write_xcr(unsigned int reg, uint64_t val,
+                      struct x86_emulate_ctxt *ctxt)
+{
+    switch ( reg )
+    {
+    case 0:
+        break;
+
+    default:
+    gp_fault:
+        if ( ctxt )
+            x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
+        return X86EMUL_EXCEPTION;
+    }
+
+    if ( unlikely(handle_xsetbv(reg, val) != 0) )
+        goto gp_fault;
+
+    return X86EMUL_OKAY;
+}
+
+#ifdef CONFIG_PV
+
+/* Called with NULL ctxt in hypercall context. */
+int x86emul_read_dr(unsigned int reg, unsigned long *val,
+                    struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *curr = current;
+
+    /* HVM support requires a bit more plumbing before it will work. */
+    ASSERT(is_pv_vcpu(curr));
+
+    switch ( reg )
+    {
+    case 0 ... 3:
+        *val = array_access_nospec(curr->arch.dr, reg);
+        break;
+
+    case 4:
+        if ( curr->arch.pv.ctrlreg[4] & X86_CR4_DE )
+            goto ud_fault;
+
+        /* Fallthrough */
+    case 6:
+        *val = curr->arch.dr6;
+        break;
+
+    case 5:
+        if ( curr->arch.pv.ctrlreg[4] & X86_CR4_DE )
+            goto ud_fault;
+
+        /* Fallthrough */
+    case 7:
+        *val = curr->arch.dr7 | curr->arch.pv.dr7_emul;
+        break;
+
+    ud_fault:
+    default:
+        if ( ctxt )
+            x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt);
+
+        return X86EMUL_EXCEPTION;
+    }
+
+    return X86EMUL_OKAY;
+}
+
+int x86emul_write_dr(unsigned int reg, unsigned long val,
+                     struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *curr = current;
+
+    /* HVM support requires a bit more plumbing before it will work. */
+    ASSERT(is_pv_vcpu(curr));
+
+    switch ( set_debugreg(curr, reg, val) )
+    {
+    case 0:
+        return X86EMUL_OKAY;
+
+    case -ENODEV:
+        x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt);
+        return X86EMUL_EXCEPTION;
+
+    default:
+        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
+        return X86EMUL_EXCEPTION;
+    }
+}
+
+#endif /* CONFIG_PV */
+
+int x86emul_cpuid(uint32_t leaf, uint32_t subleaf,
+                  struct cpuid_leaf *res, struct x86_emulate_ctxt *ctxt)
+{
+    guest_cpuid(current, leaf, subleaf, res);
+
+    return X86EMUL_OKAY;
+}
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -8412,393 +8412,3 @@ int x86_emulate_wrapper(
     return rc;
 }
 #endif
-
-static inline void check_state(const struct x86_emulate_state *state)
-{
-#if defined(__XEN__) && !defined(NDEBUG)
-    ASSERT(state->caller);
-#endif
-}
-
-#if defined(__XEN__) && !defined(NDEBUG)
-void x86_emulate_free_state(struct x86_emulate_state *state)
-{
-    check_state(state);
-    state->caller = NULL;
-}
-#endif
-
-unsigned int
-x86_insn_opsize(const struct x86_emulate_state *state)
-{
-    check_state(state);
-
-    return state->op_bytes << 3;
-}
-
-int
-x86_insn_modrm(const struct x86_emulate_state *state,
-               unsigned int *rm, unsigned int *reg)
-{
-    check_state(state);
-
-    if ( unlikely(state->modrm_mod > 3) )
-    {
-        if ( rm )
-            *rm = ~0U;
-        if ( reg )
-            *reg = ~0U;
-        return -EINVAL;
-    }
-
-    if ( rm )
-        *rm = state->modrm_rm;
-    if ( reg )
-        *reg = state->modrm_reg;
-
-    return state->modrm_mod;
-}
-
-unsigned long
-x86_insn_operand_ea(const struct x86_emulate_state *state,
-                    enum x86_segment *seg)
-{
-    *seg = state->ea.type == OP_MEM ? state->ea.mem.seg : x86_seg_none;
-
-    check_state(state);
-
-    return state->ea.mem.off;
-}
-
-/*
- * This function means to return 'true' for all supported insns with explicit
- * accesses to memory.  This means also insns which don't have an explicit
- * memory operand (like POP), but it does not mean e.g. segment selector
- * loads, where the descriptor table access is considered an implicit one.
- */
-bool
-x86_insn_is_mem_access(const struct x86_emulate_state *state,
-                       const struct x86_emulate_ctxt *ctxt)
-{
-    if ( mode_64bit() && state->not_64bit )
-        return false;
-
-    if ( state->ea.type == OP_MEM )
-    {
-        switch ( ctxt->opcode )
-        {
-        case 0x8d: /* LEA */
-        case X86EMUL_OPC(0x0f, 0x0d): /* PREFETCH */
-        case X86EMUL_OPC(0x0f, 0x18)
-         ... X86EMUL_OPC(0x0f, 0x1f): /* NOP space */
-        case X86EMUL_OPC_66(0x0f, 0x18)
-         ... X86EMUL_OPC_66(0x0f, 0x1f): /* NOP space */
-        case X86EMUL_OPC_F3(0x0f, 0x18)
-         ... X86EMUL_OPC_F3(0x0f, 0x1f): /* NOP space */
-        case X86EMUL_OPC_F2(0x0f, 0x18)
-         ... X86EMUL_OPC_F2(0x0f, 0x1f): /* NOP space */
-        case X86EMUL_OPC(0x0f, 0xb9): /* UD1 */
-        case X86EMUL_OPC(0x0f, 0xff): /* UD0 */
-        case X86EMUL_OPC_EVEX_66(0x0f38, 0xc6): /* V{GATH,SCATT}ERPF*D* */
-        case X86EMUL_OPC_EVEX_66(0x0f38, 0xc7): /* V{GATH,SCATT}ERPF*Q* */
-            return false;
-
-        case X86EMUL_OPC(0x0f, 0x01):
-            return (state->modrm_reg & 7) != 7; /* INVLPG */
-
-        case X86EMUL_OPC(0x0f, 0xae):
-            return (state->modrm_reg & 7) != 7; /* CLFLUSH */
-
-        case X86EMUL_OPC_66(0x0f, 0xae):
-            return (state->modrm_reg & 7) < 6; /* CLWB, CLFLUSHOPT */
-        }
-
-        return true;
-    }
-
-    switch ( ctxt->opcode )
-    {
-    case 0x06 ... 0x07: /* PUSH / POP %es */
-    case 0x0e:          /* PUSH %cs */
-    case 0x16 ... 0x17: /* PUSH / POP %ss */
-    case 0x1e ... 0x1f: /* PUSH / POP %ds */
-    case 0x50 ... 0x5f: /* PUSH / POP reg */
-    case 0x60 ... 0x61: /* PUSHA / POPA */
-    case 0x68: case 0x6a: /* PUSH imm */
-    case 0x6c ... 0x6f: /* INS / OUTS */
-    case 0x8f:          /* POP r/m */
-    case 0x9a:          /* CALL (far, direct) */
-    case 0x9c ... 0x9d: /* PUSHF / POPF */
-    case 0xa4 ... 0xa7: /* MOVS / CMPS */
-    case 0xaa ... 0xaf: /* STOS / LODS / SCAS */
-    case 0xc2 ... 0xc3: /* RET (near) */
-    case 0xc8 ... 0xc9: /* ENTER / LEAVE */
-    case 0xca ... 0xcb: /* RET (far) */
-    case 0xd7:          /* XLAT */
-    case 0xe8:          /* CALL (near, direct) */
-    case X86EMUL_OPC(0x0f, 0xa0):         /* PUSH %fs */
-    case X86EMUL_OPC(0x0f, 0xa1):         /* POP %fs */
-    case X86EMUL_OPC(0x0f, 0xa8):         /* PUSH %gs */
-    case X86EMUL_OPC(0x0f, 0xa9):         /* POP %gs */
-    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf7): /* MASKMOV{Q,DQU} */
-                                          /* VMASKMOVDQU */
-        return true;
-
-    case 0xff:
-        switch ( state->modrm_reg & 7 )
-        {
-        case 2: /* CALL (near, indirect) */
-        case 6: /* PUSH r/m */
-            return true;
-        }
-        break;
-
-    case X86EMUL_OPC(0x0f, 0x01):
-        /* Cover CLZERO. */
-        return (state->modrm_rm & 7) == 4 && (state->modrm_reg & 7) == 7;
-    }
-
-    return false;
-}
-
-/*
- * This function means to return 'true' for all supported insns with explicit
- * writes to memory.  This means also insns which don't have an explicit
- * memory operand (like PUSH), but it does not mean e.g. segment selector
- * loads, where the (possible) descriptor table write is considered an
- * implicit access.
- */
-bool
-x86_insn_is_mem_write(const struct x86_emulate_state *state,
-                      const struct x86_emulate_ctxt *ctxt)
-{
-    if ( mode_64bit() && state->not_64bit )
-        return false;
-
-    switch ( state->desc & DstMask )
-    {
-    case DstMem:
-        /* The SrcMem check is to cover {,V}MASKMOV{Q,DQU}. */
-        return state->modrm_mod != 3 || (state->desc & SrcMask) == SrcMem;
-
-    case DstBitBase:
-    case DstImplicit:
-        break;
-
-    default:
-        switch ( ctxt->opcode )
-        {
-        case 0x63:                         /* ARPL */
-            return !mode_64bit();
-
-        case X86EMUL_OPC_66(0x0f38, 0xf8): /* MOVDIR64B */
-        case X86EMUL_OPC_F2(0x0f38, 0xf8): /* ENQCMD */
-        case X86EMUL_OPC_F3(0x0f38, 0xf8): /* ENQCMDS */
-            return true;
-
-        case X86EMUL_OPC_EVEX_F3(0x0f38, 0x10) ...
-             X86EMUL_OPC_EVEX_F3(0x0f38, 0x15): /* VPMOVUS* */
-        case X86EMUL_OPC_EVEX_F3(0x0f38, 0x20) ...
-             X86EMUL_OPC_EVEX_F3(0x0f38, 0x25): /* VPMOVS* */
-        case X86EMUL_OPC_EVEX_F3(0x0f38, 0x30) ...
-             X86EMUL_OPC_EVEX_F3(0x0f38, 0x35): /* VPMOV{D,Q,W}* */
-            return state->modrm_mod != 3;
-        }
-
-        return false;
-    }
-
-    if ( state->modrm_mod == 3 )
-    {
-        switch ( ctxt->opcode )
-        {
-        case 0xff: /* Grp5 */
-            break;
-
-        case X86EMUL_OPC(0x0f, 0x01): /* CLZERO is the odd one. */
-            return (state->modrm_rm & 7) == 4 && (state->modrm_reg & 7) == 7;
-
-        default:
-            return false;
-        }
-    }
-
-    switch ( ctxt->opcode )
-    {
-    case 0x06:                           /* PUSH %es */
-    case 0x0e:                           /* PUSH %cs */
-    case 0x16:                           /* PUSH %ss */
-    case 0x1e:                           /* PUSH %ds */
-    case 0x50 ... 0x57:                  /* PUSH reg */
-    case 0x60:                           /* PUSHA */
-    case 0x68: case 0x6a:                /* PUSH imm */
-    case 0x6c: case 0x6d:                /* INS */
-    case 0x9a:                           /* CALL (far, direct) */
-    case 0x9c:                           /* PUSHF */
-    case 0xa4: case 0xa5:                /* MOVS */
-    case 0xaa: case 0xab:                /* STOS */
-    case 0xc8:                           /* ENTER */
-    case 0xe8:                           /* CALL (near, direct) */
-    case X86EMUL_OPC(0x0f, 0xa0):        /* PUSH %fs */
-    case X86EMUL_OPC(0x0f, 0xa8):        /* PUSH %gs */
-    case X86EMUL_OPC(0x0f, 0xab):        /* BTS */
-    case X86EMUL_OPC(0x0f, 0xb3):        /* BTR */
-    case X86EMUL_OPC(0x0f, 0xbb):        /* BTC */
-        return true;
-
-    case 0xd9:
-        switch ( state->modrm_reg & 7 )
-        {
-        case 2: /* FST m32fp */
-        case 3: /* FSTP m32fp */
-        case 6: /* FNSTENV */
-        case 7: /* FNSTCW */
-            return true;
-        }
-        break;
-
-    case 0xdb:
-        switch ( state->modrm_reg & 7 )
-        {
-        case 1: /* FISTTP m32i */
-        case 2: /* FIST m32i */
-        case 3: /* FISTP m32i */
-        case 7: /* FSTP m80fp */
-            return true;
-        }
-        break;
-
-    case 0xdd:
-        switch ( state->modrm_reg & 7 )
-        {
-        case 1: /* FISTTP m64i */
-        case 2: /* FST m64fp */
-        case 3: /* FSTP m64fp */
-        case 6: /* FNSAVE */
-        case 7: /* FNSTSW */
-            return true;
-        }
-        break;
-
-    case 0xdf:
-        switch ( state->modrm_reg & 7 )
-        {
-        case 1: /* FISTTP m16i */
-        case 2: /* FIST m16i */
-        case 3: /* FISTP m16i */
-        case 6: /* FBSTP */
-        case 7: /* FISTP m64i */
-            return true;
-        }
-        break;
-
-    case 0xff:
-        switch ( state->modrm_reg & 7 )
-        {
-        case 2: /* CALL (near, indirect) */
-        case 3: /* CALL (far, indirect) */
-        case 6: /* PUSH r/m */
-            return true;
-        }
-        break;
-
-    case X86EMUL_OPC(0x0f, 0x01):
-        switch ( state->modrm_reg & 7 )
-        {
-        case 0: /* SGDT */
-        case 1: /* SIDT */
-        case 4: /* SMSW */
-            return true;
-        }
-        break;
-
-    case X86EMUL_OPC(0x0f, 0xae):
-        switch ( state->modrm_reg & 7 )
-        {
-        case 0: /* FXSAVE */
-        /* case 3: STMXCSR - handled above */
-        case 4: /* XSAVE */
-        case 6: /* XSAVEOPT */
-            return true;
-        }
-        break;
-
-    case X86EMUL_OPC(0x0f, 0xba):
-        return (state->modrm_reg & 7) > 4; /* BTS / BTR / BTC */
-
-    case X86EMUL_OPC(0x0f, 0xc7):
-        switch ( state->modrm_reg & 7 )
-        {
-        case 1: /* CMPXCHG{8,16}B */
-        case 4: /* XSAVEC */
-        case 5: /* XSAVES */
-            return true;
-        }
-        break;
-    }
-
-    return false;
-}
-
-bool
-x86_insn_is_portio(const struct x86_emulate_state *state,
-                   const struct x86_emulate_ctxt *ctxt)
-{
-    switch ( ctxt->opcode )
-    {
-    case 0x6c ... 0x6f: /* INS / OUTS */
-    case 0xe4 ... 0xe7: /* IN / OUT imm8 */
-    case 0xec ... 0xef: /* IN / OUT %dx */
-        return true;
-    }
-
-    return false;
-}
-
-bool
-x86_insn_is_cr_access(const struct x86_emulate_state *state,
-                      const struct x86_emulate_ctxt *ctxt)
-{
-    switch ( ctxt->opcode )
-    {
-        unsigned int ext;
-
-    case X86EMUL_OPC(0x0f, 0x01):
-        if ( x86_insn_modrm(state, NULL, &ext) >= 0
-             && (ext & 5) == 4 ) /* SMSW / LMSW */
-            return true;
-        break;
-
-    case X86EMUL_OPC(0x0f, 0x06): /* CLTS */
-    case X86EMUL_OPC(0x0f, 0x20): /* MOV from CRn */
-    case X86EMUL_OPC(0x0f, 0x22): /* MOV to CRn */
-        return true;
-    }
-
-    return false;
-}
-
-unsigned long
-x86_insn_immediate(const struct x86_emulate_state *state, unsigned int nr)
-{
-    check_state(state);
-
-    switch ( nr )
-    {
-    case 0:
-        return state->imm1;
-    case 1:
-        return state->imm2;
-    }
-
-    return 0;
-}
-
-unsigned int
-x86_insn_length(const struct x86_emulate_state *state,
-                const struct x86_emulate_ctxt *ctxt)
-{
-    check_state(state);
-
-    return state->ip - ctxt->regs->r(ip);
-}



^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-08-11 12:25 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-11 12:21 [PATCH 0/7] x86emul: a few small steps towards disintegration Jan Beulich
2021-08-11 12:22 ` [PATCH 1/7] x86emul: split off opcode 0f01 handling Jan Beulich
2021-08-11 12:23 ` [PATCH 2/7] x86emul: split off opcode 0fae handling Jan Beulich
2021-08-11 12:23 ` [PATCH 3/7] x86emul: split off opcode 0fc7 handling Jan Beulich
2021-08-11 12:24 ` [PATCH 4/7] x86emul: split off FPU opcode handling Jan Beulich
2021-08-11 12:24 ` [PATCH 5/7] x86emul: split off insn decoding Jan Beulich
2021-08-11 12:25 ` [PATCH 6/7] x86emul: move x86_emul_blk() to separate source file Jan Beulich
2021-08-11 12:25 ` [PATCH 7/7] x86emul: move various utility functions to separate source files Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.