From mboxrd@z Thu Jan 1 00:00:00 1970 From: Shuai Ruan Subject: [V8 2/4] x86/xsaves: enable xsaves/xrstors/xsavec in xen Date: Fri, 23 Oct 2015 17:48:19 +0800 Message-ID: <1445593701-5300-3-git-send-email-shuai.ruan@linux.intel.com> References: <1445593701-5300-1-git-send-email-shuai.ruan@linux.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1445593701-5300-1-git-send-email-shuai.ruan@linux.intel.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xen.org Cc: kevin.tian@intel.com, wei.liu2@citrix.com, Ian.Campbell@citrix.com, stefano.stabellini@eu.citrix.com, jun.nakajima@intel.com, andrew.cooper3@citrix.com, ian.jackson@eu.citrix.com, jbeulich@suse.com, keir@xen.org List-Id: xen-devel@lists.xenproject.org This patch uses xsaves/xrstors/xsavec instead of xsaveopt/xrstor to perform the xsave_area switching so that xen itself can benefit from them when available. For xsaves/xrstors/xsavec only use compact format. Add format conversion support when perform guest os migration. Also, pv guest will not support xsaves/xrstors. Signed-off-by: Shuai Ruan Reviewed-by: Andrew Cooper --- xen/arch/x86/domain.c | 7 + xen/arch/x86/domctl.c | 31 ++++- xen/arch/x86/hvm/hvm.c | 24 +++- xen/arch/x86/i387.c | 4 + xen/arch/x86/traps.c | 7 +- xen/arch/x86/xstate.c | 248 ++++++++++++++++++++++++++++----- xen/include/asm-x86/xstate.h | 2 + xen/include/public/arch-x86/hvm/save.h | 1 + 8 files changed, 279 insertions(+), 45 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index fe3be30..108d4f8 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -883,7 +883,12 @@ int arch_set_info_guest( { memcpy(v->arch.fpu_ctxt, &c.nat->fpu_ctxt, sizeof(c.nat->fpu_ctxt)); if ( v->arch.xsave_area ) + { v->arch.xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE; + if ( cpu_has_xsaves || cpu_has_xsavec ) + v->arch.xsave_area->xsave_hdr.xcomp_bv = XSTATE_FP_SSE | + XSTATE_COMPACTION_ENABLED; + } } if ( !compat ) @@ -1568,6 +1573,8 @@ static void __context_switch(void) if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) ) BUG(); } + if ( cpu_has_xsaves && has_hvm_container_vcpu(n) ) + set_msr_xss(n->arch.hvm_vcpu.msr_xss); vcpu_restore_fpu_eager(n); n->arch.ctxt_switch_to(n); } diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c index 0f6fdb9..551dde2 100644 --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -897,9 +897,30 @@ long arch_do_domctl( ret = -EFAULT; offset += sizeof(v->arch.xcr0_accum); - if ( !ret && copy_to_guest_offset(evc->buffer, offset, - (void *)v->arch.xsave_area, - size - 2 * sizeof(uint64_t)) ) + + if ( !ret && (cpu_has_xsaves || cpu_has_xsavec) ) + { + void *xsave_area; + + xsave_area = xmalloc_bytes(size); + if ( !xsave_area ) + { + ret = -ENOMEM; + vcpu_unpause(v); + goto vcpuextstate_out; + } + + expand_xsave_states(v, xsave_area, + size - 2 * sizeof(uint64_t)); + + if ( copy_to_guest_offset(evc->buffer, offset, xsave_area, + size - 2 * sizeof(uint64_t)) ) + ret = -EFAULT; + xfree(xsave_area); + } + else if ( !ret && copy_to_guest_offset(evc->buffer, offset, + (void *)v->arch.xsave_area, + size - 2 * sizeof(uint64_t)) ) ret = -EFAULT; vcpu_unpause(v); @@ -955,8 +976,8 @@ long arch_do_domctl( v->arch.xcr0_accum = _xcr0_accum; if ( _xcr0_accum & XSTATE_NONLAZY ) v->arch.nonlazy_xstate_used = 1; - memcpy(v->arch.xsave_area, _xsave_area, - evc->size - 2 * sizeof(uint64_t)); + compress_xsave_states(v, _xsave_area, + evc->size - 2 * sizeof(uint64_t)); vcpu_unpause(v); } else diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 3fa2280..0140d34 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1735,6 +1735,7 @@ static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) hvm_funcs.save_cpu_ctxt(v, &ctxt); ctxt.msr_tsc_aux = hvm_msr_tsc_aux(v); + ctxt.msr_xss = v->arch.hvm_vcpu.msr_xss; hvm_get_segment_register(v, x86_seg_idtr, &seg); ctxt.idtr_limit = seg.limit; @@ -2025,6 +2026,11 @@ static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) v->arch.hvm_vcpu.msr_tsc_aux = ctxt.msr_tsc_aux; + if ( cpu_has_xsaves ) + v->arch.hvm_vcpu.msr_xss = ctxt.msr_xss; + else + v->arch.hvm_vcpu.msr_xss = 0; + seg.limit = ctxt.idtr_limit; seg.base = ctxt.idtr_base; hvm_set_segment_register(v, x86_seg_idtr, &seg); @@ -2088,6 +2094,9 @@ static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) memcpy(v->arch.xsave_area, ctxt.fpu_regs, sizeof(ctxt.fpu_regs)); xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE; + if ( cpu_has_xsaves || cpu_has_xsavec ) + xsave_area->xsave_hdr.xcomp_bv = XSTATE_FP_SSE | + XSTATE_COMPACTION_ENABLED; } else memcpy(v->arch.fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs)); @@ -2157,8 +2166,8 @@ static int hvm_save_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h) ctxt->xfeature_mask = xfeature_mask; ctxt->xcr0 = v->arch.xcr0; ctxt->xcr0_accum = v->arch.xcr0_accum; - memcpy(&ctxt->save_area, v->arch.xsave_area, - size - offsetof(struct hvm_hw_cpu_xsave, save_area)); + expand_xsave_states(v, &ctxt->save_area, + size - offsetof(typeof(*ctxt), save_area)); } return 0; @@ -2257,10 +2266,10 @@ static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h) v->arch.xcr0_accum = ctxt->xcr0_accum; if ( ctxt->xcr0_accum & XSTATE_NONLAZY ) v->arch.nonlazy_xstate_used = 1; - memcpy(v->arch.xsave_area, &ctxt->save_area, - min(desc->length, size) - offsetof(struct hvm_hw_cpu_xsave, - save_area)); + compress_xsave_states(v, &ctxt->save_area, + min(desc->length, size) - + offsetof(struct hvm_hw_cpu_xsave,save_area)); return 0; } @@ -5409,7 +5418,12 @@ void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, uint16_t ip) fpu_ctxt->fcw = FCW_RESET; fpu_ctxt->mxcsr = MXCSR_DEFAULT; if ( v->arch.xsave_area ) + { v->arch.xsave_area->xsave_hdr.xstate_bv = XSTATE_FP; + if ( cpu_has_xsaves || cpu_has_xsavec ) + v->arch.xsave_area->xsave_hdr.xcomp_bv = XSTATE_FP | + XSTATE_COMPACTION_ENABLED; + } v->arch.vgc_flags = VGCF_online; memset(&v->arch.user_regs, 0, sizeof(v->arch.user_regs)); diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c index 66b51cb..b661d39 100644 --- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -282,7 +282,11 @@ int vcpu_init_fpu(struct vcpu *v) return rc; if ( v->arch.xsave_area ) + { v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse; + if ( cpu_has_xsaves || cpu_has_xsavec ) + v->arch.xsave_area->xsave_hdr.xcomp_bv = XSTATE_COMPACTION_ENABLED; + } else { v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse), 16); diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 8093535..42449b1 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -935,9 +935,10 @@ void pv_cpuid(struct cpu_user_regs *regs) goto unsupported; if ( regs->_ecx == 1 ) { - a &= boot_cpu_data.x86_capability[X86_FEATURE_XSAVEOPT / 32]; - if ( !cpu_has_xsaves ) - b = c = d = 0; + a &= cpufeat_mask(X86_FEATURE_XSAVEOPT) | + cpufeat_mask(X86_FEATURE_XSAVEC) | + (cpu_has_xgetbv1 ? cpufeat_mask(X86_FEATURE_XGETBV1) : 0); + b = c = d = 0; } break; diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c index add8c55..ce65c11 100644 --- a/xen/arch/x86/xstate.c +++ b/xen/arch/x86/xstate.c @@ -23,6 +23,10 @@ static u32 __read_mostly xsave_cntxt_size; /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */ u64 __read_mostly xfeature_mask; +static unsigned int * __read_mostly xstate_offsets; +static unsigned int * __read_mostly xstate_sizes; +static unsigned int __read_mostly xstate_features; +static unsigned int __read_mostly xstate_comp_offsets[sizeof(xfeature_mask)*8]; /* Cached xss for fast read */ static DEFINE_PER_CPU(uint64_t, xss); @@ -79,6 +83,164 @@ uint64_t get_msr_xss(void) return this_cpu(xss); } +static bool_t xsave_area_compressed(const struct xsave_struct *xsave_area) +{ + return xsave_area && (xsave_area->xsave_hdr.xcomp_bv + & XSTATE_COMPACTION_ENABLED); +} + +static int setup_xstate_features(bool_t bsp) +{ + unsigned int leaf, tmp, eax, ebx; + + if ( bsp ) + { + xstate_features = fls(xfeature_mask); + xstate_offsets = xzalloc_array(unsigned int, xstate_features); + if ( !xstate_offsets ) + return -ENOMEM; + + xstate_sizes = xzalloc_array(unsigned int, xstate_features); + if ( !xstate_sizes ) + return -ENOMEM; + } + + for ( leaf = 2; leaf < xstate_features; leaf++ ) + { + if ( bsp ) + cpuid_count(XSTATE_CPUID, leaf, &xstate_sizes[leaf], + &xstate_offsets[leaf], &tmp, &tmp); + else + { + cpuid_count(XSTATE_CPUID, leaf, &eax, + &ebx, &tmp, &tmp); + BUG_ON(eax != xstate_sizes[leaf]); + BUG_ON(ebx != xstate_offsets[leaf]); + } + } + + return 0; +} + +static void __init setup_xstate_comp(void) +{ + unsigned int i; + + /* + * The FP xstates and SSE xstates are legacy states. They are always + * in the fixed offsets in the xsave area in either compacted form + * or standard form. + */ + xstate_comp_offsets[0] = 0; + xstate_comp_offsets[1] = XSAVE_SSE_OFFSET; + + xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; + + for ( i = 3; i < xstate_features; i++ ) + { + xstate_comp_offsets[i] = xstate_comp_offsets[i - 1] + + (((1ul << i) & xfeature_mask) + ? xstate_sizes[i - 1] : 0); + ASSERT(xstate_comp_offsets[i] + xstate_sizes[i] <= xsave_cntxt_size); + } +} + +static void *get_xsave_addr(void *xsave, unsigned int xfeature_idx) +{ + if ( !((1ul << xfeature_idx) & xfeature_mask) ) + return NULL; + + return xsave + xstate_comp_offsets[xfeature_idx]; +} + +void expand_xsave_states(struct vcpu *v, void *dest, unsigned int size) +{ + struct xsave_struct *xsave = v->arch.xsave_area; + u64 xstate_bv = xsave->xsave_hdr.xstate_bv; + u64 valid; + + if ( !cpu_has_xsaves && !cpu_has_xsavec ) + { + memcpy(dest, xsave, size); + return; + } + + ASSERT(xsave_area_compressed(xsave)); + /* + * Copy legacy XSAVE area, to avoid complications with CPUID + * leaves 0 and 1 in the loop below. + */ + memcpy(dest, xsave, FXSAVE_SIZE); + + ((struct xsave_struct *)dest)->xsave_hdr.xstate_bv = xstate_bv; + ((struct xsave_struct *)dest)->xsave_hdr.xcomp_bv = 0; + + /* + * Copy each region from the possibly compacted offset to the + * non-compacted offset. + */ + valid = xstate_bv & ~XSTATE_FP_SSE; + while ( valid ) + { + u64 feature = valid & -valid; + unsigned int index = fls(feature) - 1; + const void *src = get_xsave_addr(xsave, index); + + if ( src ) + { + ASSERT((xstate_offsets[index] + xstate_sizes[index]) <= size); + memcpy(dest + xstate_offsets[index], src, xstate_sizes[index]); + } + + valid &= ~feature; + } + +} + +void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size) +{ + struct xsave_struct *xsave = v->arch.xsave_area; + u64 xstate_bv = ((const struct xsave_struct *)src)->xsave_hdr.xstate_bv; + u64 valid; + + if ( !cpu_has_xsaves && !cpu_has_xsavec ) + { + memcpy(xsave, src, size); + return; + } + + ASSERT(!xsave_area_compressed(src)); + /* + * Copy legacy XSAVE area, to avoid complications with CPUID + * leaves 0 and 1 in the loop below. + */ + memcpy(xsave, src, FXSAVE_SIZE); + + /* Set XSTATE_BV and XCOMP_BV. */ + xsave->xsave_hdr.xstate_bv = xstate_bv; + xsave->xsave_hdr.xcomp_bv = v->arch.xcr0_accum | XSTATE_COMPACTION_ENABLED; + + /* + * Copy each region from the non-compacted offset to the + * possibly compacted offset. + */ + valid = xstate_bv & ~XSTATE_FP_SSE; + while ( valid ) + { + u64 feature = valid & -valid; + unsigned int index = fls(feature) - 1; + void *dest = get_xsave_addr(xsave, index); + + if ( dest ) + { + ASSERT((xstate_offsets[index] + xstate_sizes[index]) <= size); + memcpy(dest, src + xstate_offsets[index], xstate_sizes[index]); + } + + valid &= ~feature; + } +} + void xsave(struct vcpu *v, uint64_t mask) { struct xsave_struct *ptr = v->arch.xsave_area; @@ -91,7 +253,15 @@ void xsave(struct vcpu *v, uint64_t mask) typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel; typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel; - if ( cpu_has_xsaveopt ) + if ( cpu_has_xsaves ) + asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + else if ( cpu_has_xsavec ) + asm volatile ( ".byte 0x48,0x0f,0xc7,0x27" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + else if ( cpu_has_xsaveopt ) { /* * xsaveopt may not write the FPU portion even when the respective @@ -144,7 +314,15 @@ void xsave(struct vcpu *v, uint64_t mask) } else { - if ( cpu_has_xsaveopt ) + if ( cpu_has_xsaves ) + asm volatile ( ".byte 0x0f,0xc7,0x2f" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + else if ( cpu_has_xsavec ) + asm volatile ( ".byte 0x0f,0xc7,0x27" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + else if ( cpu_has_xsaveopt ) asm volatile ( ".byte 0x0f,0xae,0x37" : "=m" (*ptr) : "a" (lmask), "d" (hmask), "D" (ptr) ); @@ -158,6 +336,20 @@ void xsave(struct vcpu *v, uint64_t mask) ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = word_size; } +#define XSTATE_FIXUP ".section .fixup,\"ax\" \n" \ + "2: mov %5,%%ecx \n" \ + " xor %1,%1 \n" \ + " rep stosb \n" \ + " lea %2,%0 \n" \ + " mov %3,%1 \n" \ + " jmp 1b \n" \ + ".previous \n" \ + _ASM_EXTABLE(1b, 2b) \ + : "+&D" (ptr), "+&a" (lmask) \ + : "m" (*ptr), "g" (lmask), "d" (hmask), \ + "m" (xsave_cntxt_size) \ + : "ecx" + void xrstor(struct vcpu *v, uint64_t mask) { uint32_t hmask = mask >> 32; @@ -187,39 +379,24 @@ void xrstor(struct vcpu *v, uint64_t mask) switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) ) { default: - asm volatile ( "1: .byte 0x48,0x0f,0xae,0x2f\n" - ".section .fixup,\"ax\" \n" - "2: mov %5,%%ecx \n" - " xor %1,%1 \n" - " rep stosb \n" - " lea %2,%0 \n" - " mov %3,%1 \n" - " jmp 1b \n" - ".previous \n" - _ASM_EXTABLE(1b, 2b) - : "+&D" (ptr), "+&a" (lmask) - : "m" (*ptr), "g" (lmask), "d" (hmask), - "m" (xsave_cntxt_size) - : "ecx" ); - break; + if ( cpu_has_xsaves ) + asm volatile ( "1: .byte 0x48,0x0f,0xc7,0x1f\n" + XSTATE_FIXUP ); + else + asm volatile ( "1: .byte 0x48,0x0f,0xae,0x2f\n" + XSTATE_FIXUP ); + break; case 4: case 2: - asm volatile ( "1: .byte 0x0f,0xae,0x2f\n" - ".section .fixup,\"ax\" \n" - "2: mov %5,%%ecx \n" - " xor %1,%1 \n" - " rep stosb \n" - " lea %2,%0 \n" - " mov %3,%1 \n" - " jmp 1b \n" - ".previous \n" - _ASM_EXTABLE(1b, 2b) - : "+&D" (ptr), "+&a" (lmask) - : "m" (*ptr), "g" (lmask), "d" (hmask), - "m" (xsave_cntxt_size) - : "ecx" ); + if ( cpu_has_xsaves ) + asm volatile ( "1: .byte 0x0f,0xc7,0x1f\n" + XSTATE_FIXUP ); + else + asm volatile ( "1: .byte 0x0f,0xae,0x2f\n" + XSTATE_FIXUP ); break; } } +#undef XSTATE_FIXUP bool_t xsave_enabled(const struct vcpu *v) { @@ -343,11 +520,18 @@ void xstate_init(struct cpuinfo_x86 *c) /* Mask out features not currently understood by Xen. */ eax &= (cpufeat_mask(X86_FEATURE_XSAVEOPT) | - cpufeat_mask(X86_FEATURE_XSAVEC)); + cpufeat_mask(X86_FEATURE_XSAVEC) | + cpufeat_mask(X86_FEATURE_XGETBV1) | + cpufeat_mask(X86_FEATURE_XSAVES)); c->x86_capability[X86_FEATURE_XSAVEOPT / 32] = eax; BUG_ON(eax != boot_cpu_data.x86_capability[X86_FEATURE_XSAVEOPT / 32]); + + if ( setup_xstate_features(bsp) ) + BUG(); + if ( bsp && (cpu_has_xsaves || cpu_has_xsavec) ) + setup_xstate_comp(); } static bool_t valid_xcr0(u64 xcr0) diff --git a/xen/include/asm-x86/xstate.h b/xen/include/asm-x86/xstate.h index b95a5b5..414cc99 100644 --- a/xen/include/asm-x86/xstate.h +++ b/xen/include/asm-x86/xstate.h @@ -91,6 +91,8 @@ void xrstor(struct vcpu *v, uint64_t mask); bool_t xsave_enabled(const struct vcpu *v); int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv); int __must_check handle_xsetbv(u32 index, u64 new_bv); +void expand_xsave_states(struct vcpu *v, void *dest, unsigned int size); +void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size); /* extended state init and cleanup functions */ void xstate_free_save_area(struct vcpu *v); diff --git a/xen/include/public/arch-x86/hvm/save.h b/xen/include/public/arch-x86/hvm/save.h index efb0b62..baff602 100644 --- a/xen/include/public/arch-x86/hvm/save.h +++ b/xen/include/public/arch-x86/hvm/save.h @@ -140,6 +140,7 @@ struct hvm_hw_cpu { uint64_t msr_syscall_mask; uint64_t msr_efer; uint64_t msr_tsc_aux; + uint64_t msr_xss; /* guest's idea of what rdtsc() would return */ uint64_t tsc; -- 1.9.1