From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: [PATCH 14/17] x86emul: abstract out XCRn accesses Date: Wed, 21 Jun 2017 06:07:19 -0600 Message-ID: <594A7D9702000078001653AE@prv-mh.provo.novell.com> References: <594A733B020000780016527C@prv-mh.provo.novell.com> <594A733B020000780016527C@prv-mh.provo.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=__Part7D456367.1__=" Return-path: Received: from mail6.bemta6.messagelabs.com ([193.109.254.103]) by lists.xenproject.org with esmtp (Exim 4.84_2) (envelope-from ) id 1dNePr-0000NL-Qg for xen-devel@lists.xenproject.org; Wed, 21 Jun 2017 12:07:24 +0000 In-Reply-To: <594A733B020000780016527C@prv-mh.provo.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Errors-To: xen-devel-bounces@lists.xen.org Sender: "Xen-devel" To: xen-devel Cc: Andrew Cooper List-Id: xen-devel@lists.xenproject.org This is a MIME message. If you are reading this text, you may want to consider changing to a mail reader or gateway that understands how to properly handle MIME multipart messages. --=__Part7D456367.1__= Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Content-Disposition: inline Use hooks, just like done for other special purpose registers. This includes moving XCR0 checks from hvmemul_get_fpu() to the emulator itself as well as adding support for XGETBV emulation. For now fuzzer reads will obtain the real values (minus the fuzzing of the hook pointer itself). Signed-off-by: Jan Beulich --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c @@ -409,6 +409,8 @@ static int fuzz_write_cr( return X86EMUL_OKAY; } =20 +#define fuzz_read_xcr emul_test_read_xcr + enum { MSRI_IA32_SYSENTER_CS, MSRI_IA32_SYSENTER_ESP, @@ -527,6 +529,7 @@ static const struct x86_emulate_ops all_ SET(write_io), SET(read_cr), SET(write_cr), + SET(read_xcr), SET(read_msr), SET(write_msr), SET(wbinvd), @@ -635,6 +638,7 @@ enum { HOOK_write_cr, HOOK_read_dr, HOOK_write_dr, + HOOK_read_xcr, HOOK_read_msr, HOOK_write_msr, HOOK_wbinvd, @@ -679,6 +683,7 @@ static void disable_hooks(struct x86_emu MAYBE_DISABLE_HOOK(write_io); MAYBE_DISABLE_HOOK(read_cr); MAYBE_DISABLE_HOOK(write_cr); + MAYBE_DISABLE_HOOK(read_xcr); MAYBE_DISABLE_HOOK(read_msr); MAYBE_DISABLE_HOOK(write_msr); MAYBE_DISABLE_HOOK(wbinvd); --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -368,6 +368,7 @@ static struct x86_emulate_ops emulops =3D .read_segment =3D read_segment, .cpuid =3D emul_test_cpuid, .read_cr =3D emul_test_read_cr, + .read_xcr =3D emul_test_read_xcr, .read_msr =3D read_msr, .get_fpu =3D emul_test_get_fpu, .put_fpu =3D emul_test_put_fpu, --- a/tools/tests/x86_emulator/x86_emulate.c +++ b/tools/tests/x86_emulator/x86_emulate.c @@ -120,6 +120,19 @@ int emul_test_read_cr( return X86EMUL_UNHANDLEABLE; } =20 +int emul_test_read_xcr( + unsigned int reg, + uint64_t *val, + struct x86_emulate_ctxt *ctxt) +{ + uint32_t lo, hi; + + asm ( "xgetbv" : "=3Da" (lo), "=3Dd" (hi) : "c" (reg) ); + *val =3D lo | ((uint64_t)hi << 32); + + return X86EMUL_OKAY; +} + int emul_test_get_fpu( void (*exception_callback)(void *, struct cpu_user_regs *), void *exception_callback_arg, --- a/tools/tests/x86_emulator/x86_emulate.h +++ b/tools/tests/x86_emulator/x86_emulate.h @@ -215,6 +215,11 @@ int emul_test_read_cr( unsigned long *val, struct x86_emulate_ctxt *ctxt); =20 +int emul_test_read_xcr( + unsigned int reg, + uint64_t *val, + struct x86_emulate_ctxt *ctxt); + int emul_test_get_fpu( void (*exception_callback)(void *, struct cpu_user_regs *), void *exception_callback_arg, --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -1643,6 +1643,49 @@ static int hvmemul_write_cr( return rc; } =20 +static int hvmemul_read_xcr( + unsigned int reg, + uint64_t *val, + struct x86_emulate_ctxt *ctxt) +{ + uint32_t lo, hi; + + switch ( reg ) + { + case 0: + *val =3D current->arch.xcr0; + return X86EMUL_OKAY; + + case 1: + if ( !cpu_has_xgetbv1 ) + return X86EMUL_UNHANDLEABLE; + break; + + default: + return X86EMUL_UNHANDLEABLE; + } + + asm ( ".byte 0x0f,0x01,0xd0" /* xgetbv */ + : "=3Da" (lo), "=3Dd" (hi) : "c" (reg) ); + *val =3D lo | ((uint64_t)hi << 32); + HVMTRACE_LONG_2D(XCR_READ, reg, TRC_PAR_LONG(*val)); + + return X86EMUL_OKAY; +} + +static int hvmemul_write_xcr( + unsigned int reg, + uint64_t val, + struct x86_emulate_ctxt *ctxt) +{ + HVMTRACE_LONG_2D(XCR_WRITE, reg, TRC_PAR_LONG(val)); + if ( likely(handle_xsetbv(reg, val) =3D=3D 0) ) + return X86EMUL_OKAY; + + x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt); + return X86EMUL_EXCEPTION; +} + static int hvmemul_read_msr( unsigned int reg, uint64_t *val, @@ -1691,22 +1734,6 @@ static int hvmemul_get_fpu( { struct vcpu *curr =3D current; =20 - switch ( type ) - { - case X86EMUL_FPU_fpu: - case X86EMUL_FPU_wait: - case X86EMUL_FPU_mmx: - case X86EMUL_FPU_xmm: - break; - case X86EMUL_FPU_ymm: - if ( !(curr->arch.xcr0 & XSTATE_SSE) || - !(curr->arch.xcr0 & XSTATE_YMM) ) - return X86EMUL_UNHANDLEABLE; - break; - default: - return X86EMUL_UNHANDLEABLE; - } - if ( !curr->fpu_dirtied ) hvm_funcs.fpu_dirty_intercept(); else if ( type =3D=3D X86EMUL_FPU_fpu ) @@ -1890,6 +1917,8 @@ static const struct x86_emulate_ops hvm_ .write_io =3D hvmemul_write_io, .read_cr =3D hvmemul_read_cr, .write_cr =3D hvmemul_write_cr, + .read_xcr =3D hvmemul_read_xcr, + .write_xcr =3D hvmemul_write_xcr, .read_msr =3D hvmemul_read_msr, .write_msr =3D hvmemul_write_msr, .wbinvd =3D hvmemul_wbinvd, @@ -1915,6 +1944,8 @@ static const struct x86_emulate_ops hvm_ .write_io =3D hvmemul_write_io_discard, .read_cr =3D hvmemul_read_cr, .write_cr =3D hvmemul_write_cr, + .read_xcr =3D hvmemul_read_xcr, + .write_xcr =3D hvmemul_write_xcr, .read_msr =3D hvmemul_read_msr, .write_msr =3D hvmemul_write_msr_discard, .wbinvd =3D hvmemul_wbinvd_discard, --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -2493,6 +2493,16 @@ static int priv_op_write_dr(unsigned int ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE; } =20 +static int priv_op_write_xcr(unsigned int reg, uint64_t val, + struct x86_emulate_ctxt *ctxt) +{ + if ( likely(handle_xsetbv(reg, val) =3D=3D 0) ) + return X86EMUL_OKAY; + + x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt); + return X86EMUL_EXCEPTION; +} + static inline uint64_t guest_misc_enable(uint64_t val) { val &=3D ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL | @@ -2969,6 +2979,7 @@ static const struct x86_emulate_ops priv .write_cr =3D priv_op_write_cr, .read_dr =3D priv_op_read_dr, .write_dr =3D priv_op_write_dr, + .write_xcr =3D priv_op_write_xcr, .read_msr =3D priv_op_read_msr, .write_msr =3D priv_op_write_msr, .cpuid =3D pv_emul_cpuid, --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1117,10 +1117,27 @@ static int _get_fpu( struct x86_emulate_ctxt *ctxt, const struct x86_emulate_ops *ops) { + uint64_t xcr0; int rc; =20 fail_if(!ops->get_fpu); ASSERT(type !=3D X86EMUL_FPU_none); + + if ( type < X86EMUL_FPU_ymm || !ops->read_xcr || + ops->read_xcr(0, &xcr0, ctxt) !=3D X86EMUL_OKAY ) + xcr0 =3D 0; + + switch ( type ) + { + case X86EMUL_FPU_ymm: + if ( !(xcr0 & XSTATE_SSE) || !(xcr0 & XSTATE_YMM) ) + return X86EMUL_UNHANDLEABLE; + break; + + default: + break; + } + rc =3D ops->get_fpu(fpu_handle_exception, fic, type, ctxt); =20 if ( rc =3D=3D X86EMUL_OKAY ) @@ -1648,7 +1665,8 @@ in_protmode( #define EBX 3 =20 static bool vcpu_has( - unsigned int eax, + unsigned int leaf, + unsigned int subleaf, unsigned int reg, unsigned int bit, struct x86_emulate_ctxt *ctxt, @@ -1658,7 +1676,7 @@ static bool vcpu_has( int rc =3D X86EMUL_OKAY; =20 fail_if(!ops->cpuid); - rc =3D ops->cpuid(eax, 0, &res, ctxt); + rc =3D ops->cpuid(leaf, subleaf, &res, ctxt); if ( rc =3D=3D X86EMUL_OKAY ) { switch ( reg ) @@ -1677,53 +1695,56 @@ static bool vcpu_has( return rc =3D=3D X86EMUL_OKAY; } =20 -#define vcpu_has_fpu() vcpu_has( 1, EDX, 0, ctxt, ops) -#define vcpu_has_sep() vcpu_has( 1, EDX, 11, ctxt, ops) -#define vcpu_has_cx8() vcpu_has( 1, EDX, 8, ctxt, ops) -#define vcpu_has_cmov() vcpu_has( 1, EDX, 15, ctxt, ops) -#define vcpu_has_clflush() vcpu_has( 1, EDX, 19, ctxt, ops) -#define vcpu_has_mmx() vcpu_has( 1, EDX, 23, ctxt, ops) -#define vcpu_has_sse() vcpu_has( 1, EDX, 25, ctxt, ops) -#define vcpu_has_sse2() vcpu_has( 1, EDX, 26, ctxt, ops) -#define vcpu_has_sse3() vcpu_has( 1, ECX, 0, ctxt, ops) -#define vcpu_has_pclmulqdq() vcpu_has( 1, ECX, 1, ctxt, ops) -#define vcpu_has_ssse3() vcpu_has( 1, ECX, 9, ctxt, ops) -#define vcpu_has_fma() vcpu_has( 1, ECX, 12, ctxt, ops) -#define vcpu_has_cx16() vcpu_has( 1, ECX, 13, ctxt, ops) -#define vcpu_has_sse4_1() vcpu_has( 1, ECX, 19, ctxt, ops) -#define vcpu_has_sse4_2() vcpu_has( 1, ECX, 20, ctxt, ops) -#define vcpu_has_movbe() vcpu_has( 1, ECX, 22, ctxt, ops) -#define vcpu_has_popcnt() vcpu_has( 1, ECX, 23, ctxt, ops) -#define vcpu_has_aesni() vcpu_has( 1, ECX, 25, ctxt, ops) -#define vcpu_has_avx() vcpu_has( 1, ECX, 28, ctxt, ops) -#define vcpu_has_f16c() vcpu_has( 1, ECX, 29, ctxt, ops) -#define vcpu_has_rdrand() vcpu_has( 1, ECX, 30, ctxt, ops) -#define vcpu_has_mmxext() (vcpu_has(0x80000001, EDX, 22, ctxt, ops) = || \ +#define X 0 /* Just for documentation purposes. */ + +#define vcpu_has_fpu() vcpu_has( 1, X, EDX, 0, ctxt, = ops) +#define vcpu_has_sep() vcpu_has( 1, X, EDX, 11, ctxt, = ops) +#define vcpu_has_cx8() vcpu_has( 1, X, EDX, 8, ctxt, = ops) +#define vcpu_has_cmov() vcpu_has( 1, X, EDX, 15, ctxt, = ops) +#define vcpu_has_clflush() vcpu_has( 1, X, EDX, 19, ctxt, = ops) +#define vcpu_has_mmx() vcpu_has( 1, X, EDX, 23, ctxt, = ops) +#define vcpu_has_sse() vcpu_has( 1, X, EDX, 25, ctxt, = ops) +#define vcpu_has_sse2() vcpu_has( 1, X, EDX, 26, ctxt, = ops) +#define vcpu_has_sse3() vcpu_has( 1, X, ECX, 0, ctxt, = ops) +#define vcpu_has_pclmulqdq() vcpu_has( 1, X, ECX, 1, ctxt, = ops) +#define vcpu_has_ssse3() vcpu_has( 1, X, ECX, 9, ctxt, = ops) +#define vcpu_has_fma() vcpu_has( 1, X, ECX, 12, ctxt, = ops) +#define vcpu_has_cx16() vcpu_has( 1, X, ECX, 13, ctxt, = ops) +#define vcpu_has_sse4_1() vcpu_has( 1, X, ECX, 19, ctxt, = ops) +#define vcpu_has_sse4_2() vcpu_has( 1, X, ECX, 20, ctxt, = ops) +#define vcpu_has_movbe() vcpu_has( 1, X, ECX, 22, ctxt, = ops) +#define vcpu_has_popcnt() vcpu_has( 1, X, ECX, 23, ctxt, = ops) +#define vcpu_has_aesni() vcpu_has( 1, X, ECX, 25, ctxt, = ops) +#define vcpu_has_avx() vcpu_has( 1, X, ECX, 28, ctxt, = ops) +#define vcpu_has_f16c() vcpu_has( 1, X, ECX, 29, ctxt, = ops) +#define vcpu_has_rdrand() vcpu_has( 1, X, ECX, 30, ctxt, = ops) +#define vcpu_has_mmxext() (vcpu_has(0x80000001, X, EDX, 22, ctxt, = ops) || \ vcpu_has_sse()) -#define vcpu_has_3dnow_ext() vcpu_has(0x80000001, EDX, 30, ctxt, ops) -#define vcpu_has_3dnow() vcpu_has(0x80000001, EDX, 31, ctxt, ops) -#define vcpu_has_lahf_lm() vcpu_has(0x80000001, ECX, 0, ctxt, ops) -#define vcpu_has_cr8_legacy() vcpu_has(0x80000001, ECX, 4, ctxt, ops) -#define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX, 5, ctxt, ops) -#define vcpu_has_sse4a() vcpu_has(0x80000001, ECX, 6, ctxt, ops) -#define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX, 7, ctxt, ops) -#define vcpu_has_xop() vcpu_has(0x80000001, ECX, 12, ctxt, ops) -#define vcpu_has_fma4() vcpu_has(0x80000001, ECX, 16, ctxt, ops) -#define vcpu_has_tbm() vcpu_has(0x80000001, ECX, 21, ctxt, ops) -#define vcpu_has_bmi1() vcpu_has( 7, EBX, 3, ctxt, ops) -#define vcpu_has_hle() vcpu_has( 7, EBX, 4, ctxt, ops) -#define vcpu_has_avx2() vcpu_has( 7, EBX, 5, ctxt, ops) -#define vcpu_has_bmi2() vcpu_has( 7, EBX, 8, ctxt, ops) -#define vcpu_has_rtm() vcpu_has( 7, EBX, 11, ctxt, ops) -#define vcpu_has_mpx() vcpu_has( 7, EBX, 14, ctxt, ops) -#define vcpu_has_rdseed() vcpu_has( 7, EBX, 18, ctxt, ops) -#define vcpu_has_adx() vcpu_has( 7, EBX, 19, ctxt, ops) -#define vcpu_has_smap() vcpu_has( 7, EBX, 20, ctxt, ops) -#define vcpu_has_clflushopt() vcpu_has( 7, EBX, 23, ctxt, ops) -#define vcpu_has_clwb() vcpu_has( 7, EBX, 24, ctxt, ops) -#define vcpu_has_sha() vcpu_has( 7, EBX, 29, ctxt, ops) -#define vcpu_has_rdpid() vcpu_has( 7, ECX, 22, ctxt, ops) -#define vcpu_has_clzero() vcpu_has(0x80000008, EBX, 0, ctxt, ops) +#define vcpu_has_3dnow_ext() vcpu_has(0x80000001, X, EDX, 30, ctxt, = ops) +#define vcpu_has_3dnow() vcpu_has(0x80000001, X, EDX, 31, ctxt, = ops) +#define vcpu_has_lahf_lm() vcpu_has(0x80000001, X, ECX, 0, ctxt, = ops) +#define vcpu_has_cr8_legacy() vcpu_has(0x80000001, X, ECX, 4, ctxt, = ops) +#define vcpu_has_lzcnt() vcpu_has(0x80000001, X, ECX, 5, ctxt, = ops) +#define vcpu_has_sse4a() vcpu_has(0x80000001, X, ECX, 6, ctxt, = ops) +#define vcpu_has_misalignsse() vcpu_has(0x80000001, X, ECX, 7, ctxt, = ops) +#define vcpu_has_xop() vcpu_has(0x80000001, X, ECX, 12, ctxt, = ops) +#define vcpu_has_fma4() vcpu_has(0x80000001, X, ECX, 16, ctxt, = ops) +#define vcpu_has_tbm() vcpu_has(0x80000001, X, ECX, 21, ctxt, = ops) +#define vcpu_has_bmi1() vcpu_has( 7, 0, EBX, 3, ctxt, = ops) +#define vcpu_has_hle() vcpu_has( 7, 0, EBX, 4, ctxt, = ops) +#define vcpu_has_avx2() vcpu_has( 7, 0, EBX, 5, ctxt, = ops) +#define vcpu_has_bmi2() vcpu_has( 7, 0, EBX, 8, ctxt, = ops) +#define vcpu_has_rtm() vcpu_has( 7, 0, EBX, 11, ctxt, = ops) +#define vcpu_has_mpx() vcpu_has( 7, 0, EBX, 14, ctxt, = ops) +#define vcpu_has_rdseed() vcpu_has( 7, 0, EBX, 18, ctxt, = ops) +#define vcpu_has_adx() vcpu_has( 7, 0, EBX, 19, ctxt, = ops) +#define vcpu_has_smap() vcpu_has( 7, 0, EBX, 20, ctxt, = ops) +#define vcpu_has_clflushopt() vcpu_has( 7, 0, EBX, 23, ctxt, = ops) +#define vcpu_has_clwb() vcpu_has( 7, 0, EBX, 24, ctxt, = ops) +#define vcpu_has_sha() vcpu_has( 7, 0, EBX, 29, ctxt, = ops) +#define vcpu_has_rdpid() vcpu_has( 7, 0, ECX, 22, ctxt, = ops) +#define vcpu_has_xgetbv1() vcpu_has( 0xd, 1, EAX, 2, ctxt, = ops) +#define vcpu_has_clzero() vcpu_has(0x80000008, X, EBX, 0, ctxt, = ops) =20 #define vcpu_must_have(feat) \ generate_exception_if(!vcpu_has_##feat(), EXC_UD) @@ -5144,18 +5165,33 @@ x86_emulate( _regs.eflags |=3D X86_EFLAGS_AC; goto complete_insn; =20 -#ifdef __XEN__ - case 0xd1: /* xsetbv */ + case 0xd0: /* xgetbv */ generate_exception_if(vex.pfx, EXC_UD); - if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) !=3D = X86EMUL_OKAY ) + if ( !ops->read_cr || !ops->read_xcr || + ops->read_cr(4, &cr4, ctxt) !=3D X86EMUL_OKAY ) cr4 =3D 0; generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD); - generate_exception_if(!mode_ring0() || - handle_xsetbv(_regs.ecx, - _regs.eax | (_regs.rdx << = 32)), + generate_exception_if(_regs.ecx > (vcpu_has_xgetbv1() ? 1 : = 0), EXC_GP, 0); + rc =3D ops->read_xcr(_regs.ecx, &msr_val, ctxt); + if ( rc !=3D X86EMUL_OKAY ) + goto done; + _regs.r(ax) =3D (uint32_t)msr_val; + _regs.r(dx) =3D msr_val >> 32; + goto complete_insn; + + case 0xd1: /* xsetbv */ + generate_exception_if(vex.pfx, EXC_UD); + if ( !ops->read_cr || !ops->write_xcr || + ops->read_cr(4, &cr4, ctxt) !=3D X86EMUL_OKAY ) + cr4 =3D 0; + generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD); + generate_exception_if(!mode_ring0() || _regs.ecx, EXC_GP, 0); + rc =3D ops->write_xcr(_regs.ecx, + _regs.eax | ((uint64_t)_regs.edx << 32), = ctxt); + if ( rc !=3D X86EMUL_OKAY ) + goto done; goto complete_insn; -#endif =20 case 0xd4: /* vmfunc */ generate_exception_if(vex.pfx, EXC_UD); --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -380,6 +380,24 @@ struct x86_emulate_ops struct x86_emulate_ctxt *ctxt); =20 /* + * read_xcr: Read from extended control register. + * @reg: [IN ] Register to read. + */ + int (*read_xcr)( + unsigned int reg, + uint64_t *val, + struct x86_emulate_ctxt *ctxt); + + /* + * write_xcr: Write to extended control register. + * @reg: [IN ] Register to write. + */ + int (*write_xcr)( + unsigned int reg, + uint64_t val, + struct x86_emulate_ctxt *ctxt); + + /* * read_msr: Read from model-specific register. * @reg: [IN ] Register to read. */ --- a/xen/include/asm-x86/hvm/trace.h +++ b/xen/include/asm-x86/hvm/trace.h @@ -33,6 +33,8 @@ #define DO_TRC_HVM_CR_WRITE64 DEFAULT_HVM_REGACCESS #define DO_TRC_HVM_DR_READ DEFAULT_HVM_REGACCESS #define DO_TRC_HVM_DR_WRITE DEFAULT_HVM_REGACCESS +#define DO_TRC_HVM_XCR_READ64 DEFAULT_HVM_REGACCESS +#define DO_TRC_HVM_XCR_WRITE64 DEFAULT_HVM_REGACCESS #define DO_TRC_HVM_MSR_READ DEFAULT_HVM_REGACCESS #define DO_TRC_HVM_MSR_WRITE DEFAULT_HVM_REGACCESS #define DO_TRC_HVM_RDTSC DEFAULT_HVM_REGACCESS --- a/xen/include/asm-x86/x86-defns.h +++ b/xen/include/asm-x86/x86-defns.h @@ -66,4 +66,28 @@ #define X86_CR4_SMAP 0x00200000 /* enable SMAP */ #define X86_CR4_PKE 0x00400000 /* enable PKE */ =20 +/* + * XSTATE component flags in XCR0 + */ +#define _XSTATE_FP 0 +#define XSTATE_FP (1ULL << _XSTATE_FP) +#define _XSTATE_SSE 1 +#define XSTATE_SSE (1ULL << _XSTATE_SSE) +#define _XSTATE_YMM 2 +#define XSTATE_YMM (1ULL << _XSTATE_YMM) +#define _XSTATE_BNDREGS 3 +#define XSTATE_BNDREGS (1ULL << _XSTATE_BNDREGS) +#define _XSTATE_BNDCSR 4 +#define XSTATE_BNDCSR (1ULL << _XSTATE_BNDCSR) +#define _XSTATE_OPMASK 5 +#define XSTATE_OPMASK (1ULL << _XSTATE_OPMASK) +#define _XSTATE_ZMM 6 +#define XSTATE_ZMM (1ULL << _XSTATE_ZMM) +#define _XSTATE_HI_ZMM 7 +#define XSTATE_HI_ZMM (1ULL << _XSTATE_HI_ZMM) +#define _XSTATE_PKRU 9 +#define XSTATE_PKRU (1ULL << _XSTATE_PKRU) +#define _XSTATE_LWP 62 +#define XSTATE_LWP (1ULL << _XSTATE_LWP) + #endif /* __XEN_X86_DEFNS_H__ */ --- a/xen/include/asm-x86/xstate.h +++ b/xen/include/asm-x86/xstate.h @@ -10,6 +10,7 @@ =20 #include #include +#include =20 #define FCW_DEFAULT 0x037f #define FCW_RESET 0x0040 @@ -28,27 +29,6 @@ extern uint32_t mxcsr_mask; #define XSAVE_HDR_OFFSET FXSAVE_SIZE #define XSTATE_AREA_MIN_SIZE (FXSAVE_SIZE + XSAVE_HDR_SIZE) =20 -#define _XSTATE_FP 0 -#define XSTATE_FP (1ULL << _XSTATE_FP) -#define _XSTATE_SSE 1 -#define XSTATE_SSE (1ULL << _XSTATE_SSE) -#define _XSTATE_YMM 2 -#define XSTATE_YMM (1ULL << _XSTATE_YMM) -#define _XSTATE_BNDREGS 3 -#define XSTATE_BNDREGS (1ULL << _XSTATE_BNDREGS) -#define _XSTATE_BNDCSR 4 -#define XSTATE_BNDCSR (1ULL << _XSTATE_BNDCSR) -#define _XSTATE_OPMASK 5 -#define XSTATE_OPMASK (1ULL << _XSTATE_OPMASK) -#define _XSTATE_ZMM 6 -#define XSTATE_ZMM (1ULL << _XSTATE_ZMM) -#define _XSTATE_HI_ZMM 7 -#define XSTATE_HI_ZMM (1ULL << _XSTATE_HI_ZMM) -#define _XSTATE_PKRU 9 -#define XSTATE_PKRU (1ULL << _XSTATE_PKRU) -#define _XSTATE_LWP 62 -#define XSTATE_LWP (1ULL << _XSTATE_LWP) - #define XSTATE_FP_SSE (XSTATE_FP | XSTATE_SSE) #define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_OPMAS= K | \ XSTATE_ZMM | XSTATE_HI_ZMM | XSTATE_NONLAZY) --- a/xen/include/public/trace.h +++ b/xen/include/public/trace.h @@ -234,6 +234,8 @@ #define TRC_HVM_TRAP (TRC_HVM_HANDLER + 0x23) #define TRC_HVM_TRAP_DEBUG (TRC_HVM_HANDLER + 0x24) #define TRC_HVM_VLAPIC (TRC_HVM_HANDLER + 0x25) +#define TRC_HVM_XCR_READ64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x26) +#define TRC_HVM_XCR_WRITE64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x27) =20 #define TRC_HVM_IOPORT_WRITE (TRC_HVM_HANDLER + 0x216) #define TRC_HVM_IOMEM_WRITE (TRC_HVM_HANDLER + 0x217) --=__Part7D456367.1__= Content-Type: text/plain; name="x86emul-XCR-accesses.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="x86emul-XCR-accesses.patch" x86emul: abstract out XCRn accesses=0A=0AUse hooks, just like done for = other special purpose registers.=0A=0AThis includes moving XCR0 checks = from hvmemul_get_fpu() to the emulator=0Aitself as well as adding support = for XGETBV emulation.=0A=0AFor now fuzzer reads will obtain the real = values (minus the fuzzing of=0Athe hook pointer itself).=0A=0ASigned-off-by= : Jan Beulich =0A=0A--- a/tools/fuzz/x86_instruction_emu= lator/fuzz-emul.c=0A+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c= =0A@@ -409,6 +409,8 @@ static int fuzz_write_cr(=0A return X86EMUL_OKAY= ;=0A }=0A =0A+#define fuzz_read_xcr emul_test_read_xcr=0A+=0A enum {=0A = MSRI_IA32_SYSENTER_CS,=0A MSRI_IA32_SYSENTER_ESP,=0A@@ -527,6 +529,7 = @@ static const struct x86_emulate_ops all_=0A SET(write_io),=0A = SET(read_cr),=0A SET(write_cr),=0A+ SET(read_xcr),=0A SET(read_m= sr),=0A SET(write_msr),=0A SET(wbinvd),=0A@@ -635,6 +638,7 @@ enum = {=0A HOOK_write_cr,=0A HOOK_read_dr,=0A HOOK_write_dr,=0A+ = HOOK_read_xcr,=0A HOOK_read_msr,=0A HOOK_write_msr,=0A = HOOK_wbinvd,=0A@@ -679,6 +683,7 @@ static void disable_hooks(struct = x86_emu=0A MAYBE_DISABLE_HOOK(write_io);=0A MAYBE_DISABLE_HOOK(read= _cr);=0A MAYBE_DISABLE_HOOK(write_cr);=0A+ MAYBE_DISABLE_HOOK(read_x= cr);=0A MAYBE_DISABLE_HOOK(read_msr);=0A MAYBE_DISABLE_HOOK(write_m= sr);=0A MAYBE_DISABLE_HOOK(wbinvd);=0A--- a/tools/tests/x86_emulator/te= st_x86_emulator.c=0A+++ b/tools/tests/x86_emulator/test_x86_emulator.c=0A@@= -368,6 +368,7 @@ static struct x86_emulate_ops emulops =3D=0A = .read_segment =3D read_segment,=0A .cpuid =3D emul_test_cpuid,=0A = .read_cr =3D emul_test_read_cr,=0A+ .read_xcr =3D emul_test_rea= d_xcr,=0A .read_msr =3D read_msr,=0A .get_fpu =3D emul_test_ge= t_fpu,=0A .put_fpu =3D emul_test_put_fpu,=0A--- a/tools/tests/x86_em= ulator/x86_emulate.c=0A+++ b/tools/tests/x86_emulator/x86_emulate.c=0A@@ = -120,6 +120,19 @@ int emul_test_read_cr(=0A return X86EMUL_UNHANDLEABLE= ;=0A }=0A =0A+int emul_test_read_xcr(=0A+ unsigned int reg,=0A+ = uint64_t *val,=0A+ struct x86_emulate_ctxt *ctxt)=0A+{=0A+ uint32_t = lo, hi;=0A+=0A+ asm ( "xgetbv" : "=3Da" (lo), "=3Dd" (hi) : "c" (reg) = );=0A+ *val =3D lo | ((uint64_t)hi << 32);=0A+=0A+ return X86EMUL_OKA= Y;=0A+}=0A+=0A int emul_test_get_fpu(=0A void (*exception_callback)(voi= d *, struct cpu_user_regs *),=0A void *exception_callback_arg,=0A--- = a/tools/tests/x86_emulator/x86_emulate.h=0A+++ b/tools/tests/x86_emulator/x= 86_emulate.h=0A@@ -215,6 +215,11 @@ int emul_test_read_cr(=0A unsigned = long *val,=0A struct x86_emulate_ctxt *ctxt);=0A =0A+int emul_test_read= _xcr(=0A+ unsigned int reg,=0A+ uint64_t *val,=0A+ struct = x86_emulate_ctxt *ctxt);=0A+=0A int emul_test_get_fpu(=0A void = (*exception_callback)(void *, struct cpu_user_regs *),=0A void = *exception_callback_arg,=0A--- a/xen/arch/x86/hvm/emulate.c=0A+++ = b/xen/arch/x86/hvm/emulate.c=0A@@ -1643,6 +1643,49 @@ static int hvmemul_wr= ite_cr(=0A return rc;=0A }=0A =0A+static int hvmemul_read_xcr(=0A+ = unsigned int reg,=0A+ uint64_t *val,=0A+ struct x86_emulate_ctxt = *ctxt)=0A+{=0A+ uint32_t lo, hi;=0A+=0A+ switch ( reg )=0A+ {=0A+ = case 0:=0A+ *val =3D current->arch.xcr0;=0A+ return = X86EMUL_OKAY;=0A+=0A+ case 1:=0A+ if ( !cpu_has_xgetbv1 )=0A+ = return X86EMUL_UNHANDLEABLE;=0A+ break;=0A+=0A+ = default:=0A+ return X86EMUL_UNHANDLEABLE;=0A+ }=0A+=0A+ asm ( = ".byte 0x0f,0x01,0xd0" /* xgetbv */=0A+ : "=3Da" (lo), "=3Dd" = (hi) : "c" (reg) );=0A+ *val =3D lo | ((uint64_t)hi << 32);=0A+ = HVMTRACE_LONG_2D(XCR_READ, reg, TRC_PAR_LONG(*val));=0A+=0A+ return = X86EMUL_OKAY;=0A+}=0A+=0A+static int hvmemul_write_xcr(=0A+ unsigned = int reg,=0A+ uint64_t val,=0A+ struct x86_emulate_ctxt *ctxt)=0A+{=0A= + HVMTRACE_LONG_2D(XCR_WRITE, reg, TRC_PAR_LONG(val));=0A+ if ( = likely(handle_xsetbv(reg, val) =3D=3D 0) )=0A+ return X86EMUL_OKAY;= =0A+=0A+ x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);=0A+ return = X86EMUL_EXCEPTION;=0A+}=0A+=0A static int hvmemul_read_msr(=0A = unsigned int reg,=0A uint64_t *val,=0A@@ -1691,22 +1734,6 @@ static = int hvmemul_get_fpu(=0A {=0A struct vcpu *curr =3D current;=0A =0A- = switch ( type )=0A- {=0A- case X86EMUL_FPU_fpu:=0A- case = X86EMUL_FPU_wait:=0A- case X86EMUL_FPU_mmx:=0A- case X86EMUL_FPU_xmm:= =0A- break;=0A- case X86EMUL_FPU_ymm:=0A- if ( !(curr->arc= h.xcr0 & XSTATE_SSE) ||=0A- !(curr->arch.xcr0 & XSTATE_YMM) = )=0A- return X86EMUL_UNHANDLEABLE;=0A- break;=0A- = default:=0A- return X86EMUL_UNHANDLEABLE;=0A- }=0A-=0A if ( = !curr->fpu_dirtied )=0A hvm_funcs.fpu_dirty_intercept();=0A = else if ( type =3D=3D X86EMUL_FPU_fpu )=0A@@ -1890,6 +1917,8 @@ static = const struct x86_emulate_ops hvm_=0A .write_io =3D hvmemul_write_i= o,=0A .read_cr =3D hvmemul_read_cr,=0A .write_cr =3D = hvmemul_write_cr,=0A+ .read_xcr =3D hvmemul_read_xcr,=0A+ = .write_xcr =3D hvmemul_write_xcr,=0A .read_msr =3D hvmemul_rea= d_msr,=0A .write_msr =3D hvmemul_write_msr,=0A .wbinvd = =3D hvmemul_wbinvd,=0A@@ -1915,6 +1944,8 @@ static const struct x86_emulate= _ops hvm_=0A .write_io =3D hvmemul_write_io_discard,=0A = .read_cr =3D hvmemul_read_cr,=0A .write_cr =3D hvmemul_write= _cr,=0A+ .read_xcr =3D hvmemul_read_xcr,=0A+ .write_xcr =3D = hvmemul_write_xcr,=0A .read_msr =3D hvmemul_read_msr,=0A = .write_msr =3D hvmemul_write_msr_discard,=0A .wbinvd =3D = hvmemul_wbinvd_discard,=0A--- a/xen/arch/x86/traps.c=0A+++ b/xen/arch/x86/t= raps.c=0A@@ -2493,6 +2493,16 @@ static int priv_op_write_dr(unsigned = int=0A ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;=0A }=0A =0A+static= int priv_op_write_xcr(unsigned int reg, uint64_t val,=0A+ = struct x86_emulate_ctxt *ctxt)=0A+{=0A+ if ( likely(handle_x= setbv(reg, val) =3D=3D 0) )=0A+ return X86EMUL_OKAY;=0A+=0A+ = x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);=0A+ return X86EMUL_EXCEPT= ION;=0A+}=0A+=0A static inline uint64_t guest_misc_enable(uint64_t val)=0A = {=0A val &=3D ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |=0A@@ -2969,6 +2979,7 = @@ static const struct x86_emulate_ops priv=0A .write_cr = =3D priv_op_write_cr,=0A .read_dr =3D priv_op_read_dr,=0A = .write_dr =3D priv_op_write_dr,=0A+ .write_xcr = =3D priv_op_write_xcr,=0A .read_msr =3D priv_op_read_msr,=0A= .write_msr =3D priv_op_write_msr,=0A .cpuid = =3D pv_emul_cpuid,=0A--- a/xen/arch/x86/x86_emulate/x86_emulate.c=0A+++ = b/xen/arch/x86/x86_emulate/x86_emulate.c=0A@@ -1117,10 +1117,27 @@ static = int _get_fpu(=0A struct x86_emulate_ctxt *ctxt,=0A const struct = x86_emulate_ops *ops)=0A {=0A+ uint64_t xcr0;=0A int rc;=0A =0A = fail_if(!ops->get_fpu);=0A ASSERT(type !=3D X86EMUL_FPU_none);=0A+=0A+ = if ( type < X86EMUL_FPU_ymm || !ops->read_xcr ||=0A+ ops->read_x= cr(0, &xcr0, ctxt) !=3D X86EMUL_OKAY )=0A+ xcr0 =3D 0;=0A+=0A+ = switch ( type )=0A+ {=0A+ case X86EMUL_FPU_ymm:=0A+ if ( = !(xcr0 & XSTATE_SSE) || !(xcr0 & XSTATE_YMM) )=0A+ return = X86EMUL_UNHANDLEABLE;=0A+ break;=0A+=0A+ default:=0A+ = break;=0A+ }=0A+=0A rc =3D ops->get_fpu(fpu_handle_exception, fic, = type, ctxt);=0A =0A if ( rc =3D=3D X86EMUL_OKAY )=0A@@ -1648,7 +1665,8 = @@ in_protmode(=0A #define EBX 3=0A =0A static bool vcpu_has(=0A- = unsigned int eax,=0A+ unsigned int leaf,=0A+ unsigned int subleaf,=0A= unsigned int reg,=0A unsigned int bit,=0A struct x86_emulate_c= txt *ctxt,=0A@@ -1658,7 +1676,7 @@ static bool vcpu_has(=0A int rc =3D = X86EMUL_OKAY;=0A =0A fail_if(!ops->cpuid);=0A- rc =3D ops->cpuid(eax= , 0, &res, ctxt);=0A+ rc =3D ops->cpuid(leaf, subleaf, &res, ctxt);=0A = if ( rc =3D=3D X86EMUL_OKAY )=0A {=0A switch ( reg )=0A@@ = -1677,53 +1695,56 @@ static bool vcpu_has(=0A return rc =3D=3D = X86EMUL_OKAY;=0A }=0A =0A-#define vcpu_has_fpu() vcpu_has( = 1, EDX, 0, ctxt, ops)=0A-#define vcpu_has_sep() vcpu_has( = 1, EDX, 11, ctxt, ops)=0A-#define vcpu_has_cx8() vcpu_has( = 1, EDX, 8, ctxt, ops)=0A-#define vcpu_has_cmov() vcpu_has( = 1, EDX, 15, ctxt, ops)=0A-#define vcpu_has_clflush() vcpu_has( = 1, EDX, 19, ctxt, ops)=0A-#define vcpu_has_mmx() vcpu_has( = 1, EDX, 23, ctxt, ops)=0A-#define vcpu_has_sse() vcpu_has( = 1, EDX, 25, ctxt, ops)=0A-#define vcpu_has_sse2() vcpu_has( = 1, EDX, 26, ctxt, ops)=0A-#define vcpu_has_sse3() vcpu_has( = 1, ECX, 0, ctxt, ops)=0A-#define vcpu_has_pclmulqdq() vcpu_has( = 1, ECX, 1, ctxt, ops)=0A-#define vcpu_has_ssse3() vcpu_has( = 1, ECX, 9, ctxt, ops)=0A-#define vcpu_has_fma() vcpu_has( = 1, ECX, 12, ctxt, ops)=0A-#define vcpu_has_cx16() vcpu_has( = 1, ECX, 13, ctxt, ops)=0A-#define vcpu_has_sse4_1() vcpu_has( = 1, ECX, 19, ctxt, ops)=0A-#define vcpu_has_sse4_2() vcpu_has( = 1, ECX, 20, ctxt, ops)=0A-#define vcpu_has_movbe() vcpu_has( = 1, ECX, 22, ctxt, ops)=0A-#define vcpu_has_popcnt() vcpu_has( = 1, ECX, 23, ctxt, ops)=0A-#define vcpu_has_aesni() vcpu_has( = 1, ECX, 25, ctxt, ops)=0A-#define vcpu_has_avx() vcpu_has( = 1, ECX, 28, ctxt, ops)=0A-#define vcpu_has_f16c() vcpu_has( = 1, ECX, 29, ctxt, ops)=0A-#define vcpu_has_rdrand() vcpu_has( = 1, ECX, 30, ctxt, ops)=0A-#define vcpu_has_mmxext() (vcpu_has(0x8000000= 1, EDX, 22, ctxt, ops) || \=0A+#define X 0 /* Just for documentation = purposes. */=0A+=0A+#define vcpu_has_fpu() vcpu_has( 1, X, = EDX, 0, ctxt, ops)=0A+#define vcpu_has_sep() vcpu_has( 1, = X, EDX, 11, ctxt, ops)=0A+#define vcpu_has_cx8() vcpu_has( = 1, X, EDX, 8, ctxt, ops)=0A+#define vcpu_has_cmov() vcpu_has( = 1, X, EDX, 15, ctxt, ops)=0A+#define vcpu_has_clflush() vcpu_has( = 1, X, EDX, 19, ctxt, ops)=0A+#define vcpu_has_mmx() = vcpu_has( 1, X, EDX, 23, ctxt, ops)=0A+#define vcpu_has_sse() = vcpu_has( 1, X, EDX, 25, ctxt, ops)=0A+#define vcpu_has_sse2() = vcpu_has( 1, X, EDX, 26, ctxt, ops)=0A+#define vcpu_has_sse3(= ) vcpu_has( 1, X, ECX, 0, ctxt, ops)=0A+#define vcpu_has_pc= lmulqdq() vcpu_has( 1, X, ECX, 1, ctxt, ops)=0A+#define = vcpu_has_ssse3() vcpu_has( 1, X, ECX, 9, ctxt, ops)=0A+#defi= ne vcpu_has_fma() vcpu_has( 1, X, ECX, 12, ctxt, ops)=0A+#d= efine vcpu_has_cx16() vcpu_has( 1, X, ECX, 13, ctxt, = ops)=0A+#define vcpu_has_sse4_1() vcpu_has( 1, X, ECX, 19, = ctxt, ops)=0A+#define vcpu_has_sse4_2() vcpu_has( 1, X, ECX, = 20, ctxt, ops)=0A+#define vcpu_has_movbe() vcpu_has( 1, X, = ECX, 22, ctxt, ops)=0A+#define vcpu_has_popcnt() vcpu_has( 1, = X, ECX, 23, ctxt, ops)=0A+#define vcpu_has_aesni() vcpu_has( = 1, X, ECX, 25, ctxt, ops)=0A+#define vcpu_has_avx() vcpu_has( = 1, X, ECX, 28, ctxt, ops)=0A+#define vcpu_has_f16c() vcpu_has( = 1, X, ECX, 29, ctxt, ops)=0A+#define vcpu_has_rdrand() = vcpu_has( 1, X, ECX, 30, ctxt, ops)=0A+#define vcpu_has_mmxext() = (vcpu_has(0x80000001, X, EDX, 22, ctxt, ops) || \=0A = vcpu_has_sse())=0A-#define vcpu_has_3dnow_ext() vcpu_has(0x800= 00001, EDX, 30, ctxt, ops)=0A-#define vcpu_has_3dnow() vcpu_has(0x800= 00001, EDX, 31, ctxt, ops)=0A-#define vcpu_has_lahf_lm() vcpu_has(0x800= 00001, ECX, 0, ctxt, ops)=0A-#define vcpu_has_cr8_legacy() vcpu_has(0x800= 00001, ECX, 4, ctxt, ops)=0A-#define vcpu_has_lzcnt() vcpu_has(0x800= 00001, ECX, 5, ctxt, ops)=0A-#define vcpu_has_sse4a() vcpu_has(0x800= 00001, ECX, 6, ctxt, ops)=0A-#define vcpu_has_misalignsse() vcpu_has(0x800= 00001, ECX, 7, ctxt, ops)=0A-#define vcpu_has_xop() vcpu_has(0x800= 00001, ECX, 12, ctxt, ops)=0A-#define vcpu_has_fma4() vcpu_has(0x800= 00001, ECX, 16, ctxt, ops)=0A-#define vcpu_has_tbm() vcpu_has(0x800= 00001, ECX, 21, ctxt, ops)=0A-#define vcpu_has_bmi1() vcpu_has( = 7, EBX, 3, ctxt, ops)=0A-#define vcpu_has_hle() vcpu_has( = 7, EBX, 4, ctxt, ops)=0A-#define vcpu_has_avx2() vcpu_has( = 7, EBX, 5, ctxt, ops)=0A-#define vcpu_has_bmi2() vcpu_has( = 7, EBX, 8, ctxt, ops)=0A-#define vcpu_has_rtm() vcpu_has( = 7, EBX, 11, ctxt, ops)=0A-#define vcpu_has_mpx() vcpu_has( = 7, EBX, 14, ctxt, ops)=0A-#define vcpu_has_rdseed() vcpu_has( = 7, EBX, 18, ctxt, ops)=0A-#define vcpu_has_adx() vcpu_has( = 7, EBX, 19, ctxt, ops)=0A-#define vcpu_has_smap() vcpu_has( = 7, EBX, 20, ctxt, ops)=0A-#define vcpu_has_clflushopt() vcpu_has( = 7, EBX, 23, ctxt, ops)=0A-#define vcpu_has_clwb() vcpu_has( = 7, EBX, 24, ctxt, ops)=0A-#define vcpu_has_sha() vcpu_has( = 7, EBX, 29, ctxt, ops)=0A-#define vcpu_has_rdpid() vcpu_has( = 7, ECX, 22, ctxt, ops)=0A-#define vcpu_has_clzero() vcpu_has(0x800= 00008, EBX, 0, ctxt, ops)=0A+#define vcpu_has_3dnow_ext() vcpu_has(0x800= 00001, X, EDX, 30, ctxt, ops)=0A+#define vcpu_has_3dnow() vcpu_has(0x= 80000001, X, EDX, 31, ctxt, ops)=0A+#define vcpu_has_lahf_lm() = vcpu_has(0x80000001, X, ECX, 0, ctxt, ops)=0A+#define vcpu_has_cr8_legacy(= ) vcpu_has(0x80000001, X, ECX, 4, ctxt, ops)=0A+#define vcpu_has_lzcnt() = vcpu_has(0x80000001, X, ECX, 5, ctxt, ops)=0A+#define vcpu_has_sse4a= () vcpu_has(0x80000001, X, ECX, 6, ctxt, ops)=0A+#define vcpu_has_mi= salignsse() vcpu_has(0x80000001, X, ECX, 7, ctxt, ops)=0A+#define = vcpu_has_xop() vcpu_has(0x80000001, X, ECX, 12, ctxt, ops)=0A+#defi= ne vcpu_has_fma4() vcpu_has(0x80000001, X, ECX, 16, ctxt, ops)=0A+#d= efine vcpu_has_tbm() vcpu_has(0x80000001, X, ECX, 21, ctxt, = ops)=0A+#define vcpu_has_bmi1() vcpu_has( 7, 0, EBX, 3, = ctxt, ops)=0A+#define vcpu_has_hle() vcpu_has( 7, 0, EBX, = 4, ctxt, ops)=0A+#define vcpu_has_avx2() vcpu_has( 7, 0, = EBX, 5, ctxt, ops)=0A+#define vcpu_has_bmi2() vcpu_has( 7, = 0, EBX, 8, ctxt, ops)=0A+#define vcpu_has_rtm() vcpu_has( = 7, 0, EBX, 11, ctxt, ops)=0A+#define vcpu_has_mpx() vcpu_has( = 7, 0, EBX, 14, ctxt, ops)=0A+#define vcpu_has_rdseed() vcpu_has( = 7, 0, EBX, 18, ctxt, ops)=0A+#define vcpu_has_adx() = vcpu_has( 7, 0, EBX, 19, ctxt, ops)=0A+#define vcpu_has_smap() = vcpu_has( 7, 0, EBX, 20, ctxt, ops)=0A+#define vcpu_has_clflusho= pt() vcpu_has( 7, 0, EBX, 23, ctxt, ops)=0A+#define vcpu_has_clwb(= ) vcpu_has( 7, 0, EBX, 24, ctxt, ops)=0A+#define vcpu_has_sh= a() vcpu_has( 7, 0, EBX, 29, ctxt, ops)=0A+#define = vcpu_has_rdpid() vcpu_has( 7, 0, ECX, 22, ctxt, ops)=0A+#defi= ne vcpu_has_xgetbv1() vcpu_has( 0xd, 1, EAX, 2, ctxt, ops)=0A+#d= efine vcpu_has_clzero() vcpu_has(0x80000008, X, EBX, 0, ctxt, = ops)=0A =0A #define vcpu_must_have(feat) \=0A generate_exception_if(!vc= pu_has_##feat(), EXC_UD)=0A@@ -5144,18 +5165,33 @@ x86_emulate(=0A = _regs.eflags |=3D X86_EFLAGS_AC;=0A goto complete_insn;= =0A =0A-#ifdef __XEN__=0A- case 0xd1: /* xsetbv */=0A+ case = 0xd0: /* xgetbv */=0A generate_exception_if(vex.pfx, = EXC_UD);=0A- if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) = !=3D X86EMUL_OKAY )=0A+ if ( !ops->read_cr || !ops->read_xcr = ||=0A+ ops->read_cr(4, &cr4, ctxt) !=3D X86EMUL_OKAY )=0A = cr4 =3D 0;=0A generate_exception_if(!(cr4 & = X86_CR4_OSXSAVE), EXC_UD);=0A- generate_exception_if(!mode_ring0= () ||=0A- handle_xsetbv(_regs.ecx,=0A- = _regs.eax | (_regs.rdx << = 32)),=0A+ generate_exception_if(_regs.ecx > (vcpu_has_xgetbv1() = ? 1 : 0),=0A EXC_GP, 0);=0A+ = rc =3D ops->read_xcr(_regs.ecx, &msr_val, ctxt);=0A+ if ( rc = !=3D X86EMUL_OKAY )=0A+ goto done;=0A+ _regs.r(ax= ) =3D (uint32_t)msr_val;=0A+ _regs.r(dx) =3D msr_val >> 32;=0A+ = goto complete_insn;=0A+=0A+ case 0xd1: /* xsetbv */=0A+ = generate_exception_if(vex.pfx, EXC_UD);=0A+ if ( = !ops->read_cr || !ops->write_xcr ||=0A+ ops->read_cr(4, = &cr4, ctxt) !=3D X86EMUL_OKAY )=0A+ cr4 =3D 0;=0A+ = generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);=0A+ = generate_exception_if(!mode_ring0() || _regs.ecx, EXC_GP, 0);=0A+ = rc =3D ops->write_xcr(_regs.ecx,=0A+ = _regs.eax | ((uint64_t)_regs.edx << 32), ctxt);=0A+ if ( rc = !=3D X86EMUL_OKAY )=0A+ goto done;=0A goto = complete_insn;=0A-#endif=0A =0A case 0xd4: /* vmfunc */=0A = generate_exception_if(vex.pfx, EXC_UD);=0A--- a/xen/arch/x86/x86_emulat= e/x86_emulate.h=0A+++ b/xen/arch/x86/x86_emulate/x86_emulate.h=0A@@ -380,6 = +380,24 @@ struct x86_emulate_ops=0A struct x86_emulate_ctxt = *ctxt);=0A =0A /*=0A+ * read_xcr: Read from extended control = register.=0A+ * @reg: [IN ] Register to read.=0A+ */=0A+ int = (*read_xcr)(=0A+ unsigned int reg,=0A+ uint64_t *val,=0A+ = struct x86_emulate_ctxt *ctxt);=0A+=0A+ /*=0A+ * write_xcr: = Write to extended control register.=0A+ * @reg: [IN ] Register to = write.=0A+ */=0A+ int (*write_xcr)(=0A+ unsigned int = reg,=0A+ uint64_t val,=0A+ struct x86_emulate_ctxt *ctxt);=0A= +=0A+ /*=0A * read_msr: Read from model-specific register.=0A = * @reg: [IN ] Register to read.=0A */=0A--- a/xen/include/asm-x86/h= vm/trace.h=0A+++ b/xen/include/asm-x86/hvm/trace.h=0A@@ -33,6 +33,8 @@=0A = #define DO_TRC_HVM_CR_WRITE64 DEFAULT_HVM_REGACCESS=0A #define DO_TRC_HVM_= DR_READ DEFAULT_HVM_REGACCESS=0A #define DO_TRC_HVM_DR_WRITE = DEFAULT_HVM_REGACCESS=0A+#define DO_TRC_HVM_XCR_READ64 DEFAULT_HVM_REGACCE= SS=0A+#define DO_TRC_HVM_XCR_WRITE64 DEFAULT_HVM_REGACCESS=0A #define = DO_TRC_HVM_MSR_READ DEFAULT_HVM_REGACCESS=0A #define DO_TRC_HVM_MSR_WRIT= E DEFAULT_HVM_REGACCESS=0A #define DO_TRC_HVM_RDTSC DEFAULT_HVM_REG= ACCESS=0A--- a/xen/include/asm-x86/x86-defns.h=0A+++ b/xen/include/asm-x86/= x86-defns.h=0A@@ -66,4 +66,28 @@=0A #define X86_CR4_SMAP 0x00200000 = /* enable SMAP */=0A #define X86_CR4_PKE 0x00400000 /* enable PKE = */=0A =0A+/*=0A+ * XSTATE component flags in XCR0=0A+ */=0A+#define = _XSTATE_FP 0=0A+#define XSTATE_FP (1ULL << = _XSTATE_FP)=0A+#define _XSTATE_SSE 1=0A+#define XSTATE_SSE = (1ULL << _XSTATE_SSE)=0A+#define _XSTATE_YMM = 2=0A+#define XSTATE_YMM (1ULL << _XSTATE_YMM)=0A+#define = _XSTATE_BNDREGS 3=0A+#define XSTATE_BNDREGS (1ULL << = _XSTATE_BNDREGS)=0A+#define _XSTATE_BNDCSR 4=0A+#define = XSTATE_BNDCSR (1ULL << _XSTATE_BNDCSR)=0A+#define _XSTATE_OPMAS= K 5=0A+#define XSTATE_OPMASK (1ULL << _XSTATE_OPMASK= )=0A+#define _XSTATE_ZMM 6=0A+#define XSTATE_ZMM = (1ULL << _XSTATE_ZMM)=0A+#define _XSTATE_HI_ZMM 7=0A+#define = XSTATE_HI_ZMM (1ULL << _XSTATE_HI_ZMM)=0A+#define _XSTATE_PKRU = 9=0A+#define XSTATE_PKRU (1ULL << _XSTATE_PKRU)= =0A+#define _XSTATE_LWP 62=0A+#define XSTATE_LWP = (1ULL << _XSTATE_LWP)=0A+=0A #endif /* __XEN_X86_DEFNS_H__ */=0A--- = a/xen/include/asm-x86/xstate.h=0A+++ b/xen/include/asm-x86/xstate.h=0A@@ = -10,6 +10,7 @@=0A =0A #include =0A #include = =0A+#include =0A =0A #define FCW_DEFAULT = 0x037f=0A #define FCW_RESET 0x0040=0A@@ -28,27 +29,6 @@ = extern uint32_t mxcsr_mask;=0A #define XSAVE_HDR_OFFSET FXSAVE_SIZ= E=0A #define XSTATE_AREA_MIN_SIZE (FXSAVE_SIZE + XSAVE_HDR_SIZE)=0A = =0A-#define _XSTATE_FP 0=0A-#define XSTATE_FP = (1ULL << _XSTATE_FP)=0A-#define _XSTATE_SSE 1=0A-#define = XSTATE_SSE (1ULL << _XSTATE_SSE)=0A-#define _XSTATE_YMM = 2=0A-#define XSTATE_YMM (1ULL << _XSTATE_YMM)=0A-#= define _XSTATE_BNDREGS 3=0A-#define XSTATE_BNDREGS = (1ULL << _XSTATE_BNDREGS)=0A-#define _XSTATE_BNDCSR 4=0A-#define= XSTATE_BNDCSR (1ULL << _XSTATE_BNDCSR)=0A-#define _XSTATE_OPMA= SK 5=0A-#define XSTATE_OPMASK (1ULL << _XSTATE_OPMAS= K)=0A-#define _XSTATE_ZMM 6=0A-#define XSTATE_ZMM = (1ULL << _XSTATE_ZMM)=0A-#define _XSTATE_HI_ZMM 7=0A-#define= XSTATE_HI_ZMM (1ULL << _XSTATE_HI_ZMM)=0A-#define _XSTATE_PKRU= 9=0A-#define XSTATE_PKRU (1ULL << _XSTATE_PKRU)= =0A-#define _XSTATE_LWP 62=0A-#define XSTATE_LWP = (1ULL << _XSTATE_LWP)=0A-=0A #define XSTATE_FP_SSE (XSTATE_FP | = XSTATE_SSE)=0A #define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM = | XSTATE_OPMASK | \=0A XSTATE_ZMM | XSTATE_HI_ZMM = | XSTATE_NONLAZY)=0A--- a/xen/include/public/trace.h=0A+++ b/xen/include/pu= blic/trace.h=0A@@ -234,6 +234,8 @@=0A #define TRC_HVM_TRAP = (TRC_HVM_HANDLER + 0x23)=0A #define TRC_HVM_TRAP_DEBUG (TRC_HVM_HANDL= ER + 0x24)=0A #define TRC_HVM_VLAPIC (TRC_HVM_HANDLER + = 0x25)=0A+#define TRC_HVM_XCR_READ64 (TRC_HVM_HANDLER + TRC_64_FLAG + = 0x26)=0A+#define TRC_HVM_XCR_WRITE64 (TRC_HVM_HANDLER + TRC_64_FLAG + = 0x27)=0A =0A #define TRC_HVM_IOPORT_WRITE (TRC_HVM_HANDLER + 0x216)=0A = #define TRC_HVM_IOMEM_WRITE (TRC_HVM_HANDLER + 0x217)=0A --=__Part7D456367.1__= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: base64 Content-Disposition: inline X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KWGVuLWRldmVs IG1haWxpbmcgbGlzdApYZW4tZGV2ZWxAbGlzdHMueGVuLm9yZwpodHRwczovL2xpc3RzLnhlbi5v cmcveGVuLWRldmVsCg== --=__Part7D456367.1__=--