From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mukesh Rathor Subject: [PATCH 09/18] PVH xen: domain creation code changes Date: Fri, 24 May 2013 18:25:28 -0700 Message-ID: <1369445137-19755-10-git-send-email-mukesh.rathor@oracle.com> References: <1369445137-19755-1-git-send-email-mukesh.rathor@oracle.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1369445137-19755-1-git-send-email-mukesh.rathor@oracle.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Xen-devel@lists.xensource.com List-Id: xen-devel@lists.xenproject.org This patch contains changes to arch/x86/domain.c to allow for a PVH domain. Changes in V2: - changes to read_segment_register() moved to this patch. - The other comment was to create NULL functions for pvh_set_vcpu_info and pvh_read_descriptor which are implemented in later patch, but since I disable PVH creation until all patches are checked in, it is not needed. But it helps breaking down of patches. Changes in V3: - Fix read_segment_register() macro to make sure args are evaluated once, and use # instead of STR for name in the macro. Changes in V4: - Remove pvh substruct in the hvm substruct, as the vcpu_info_mfn has been moved out of pv_vcpu struct. - rename hvm_pvh_* functions to hvm_*. Changes in V5: - remove pvh_read_descriptor(). Signed-off-by: Mukesh Rathor --- xen/arch/x86/domain.c | 61 +++++++++++++++++++++++++++------------- xen/arch/x86/mm.c | 3 ++ xen/arch/x86/mm/hap/hap.c | 4 ++- xen/include/asm-x86/hvm/hvm.h | 8 +++++ xen/include/asm-x86/system.h | 18 +++++++++--- 5 files changed, 69 insertions(+), 25 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 31a8a50..9953f80 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -385,7 +385,7 @@ int vcpu_initialise(struct vcpu *v) vmce_init_vcpu(v); - if ( is_hvm_domain(d) ) + if ( !is_pv_domain(d) ) { rc = hvm_vcpu_initialise(v); goto done; @@ -452,7 +452,7 @@ void vcpu_destroy(struct vcpu *v) vcpu_destroy_fpu(v); - if ( is_hvm_vcpu(v) ) + if ( !is_pv_vcpu(v) ) hvm_vcpu_destroy(v); else xfree(v->arch.pv_vcpu.trap_ctxt); @@ -464,7 +464,7 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) int rc = -ENOMEM; d->arch.hvm_domain.hap_enabled = - is_hvm_domain(d) && + !is_pv_domain(d) && hvm_funcs.hap_supported && (domcr_flags & DOMCRF_hap); d->arch.hvm_domain.mem_sharing_enabled = 0; @@ -512,7 +512,7 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) mapcache_domain_init(d); HYPERVISOR_COMPAT_VIRT_START(d) = - is_hvm_domain(d) ? ~0u : __HYPERVISOR_COMPAT_VIRT_START; + is_pv_domain(d) ? __HYPERVISOR_COMPAT_VIRT_START : ~0u; if ( (rc = paging_domain_init(d, domcr_flags)) != 0 ) goto fail; @@ -555,7 +555,7 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) } spin_lock_init(&d->arch.e820_lock); - if ( is_hvm_domain(d) ) + if ( !is_pv_domain(d) ) { if ( (rc = hvm_domain_initialise(d)) != 0 ) { @@ -658,7 +658,7 @@ int arch_set_info_guest( #define c(fld) (compat ? (c.cmp->fld) : (c.nat->fld)) flags = c(flags); - if ( !is_hvm_vcpu(v) ) + if ( is_pv_vcpu(v) ) { if ( !compat ) { @@ -711,7 +711,7 @@ int arch_set_info_guest( v->fpu_initialised = !!(flags & VGCF_I387_VALID); v->arch.flags &= ~TF_kernel_mode; - if ( (flags & VGCF_in_kernel) || is_hvm_vcpu(v)/*???*/ ) + if ( (flags & VGCF_in_kernel) || !is_pv_vcpu(v)/*???*/ ) v->arch.flags |= TF_kernel_mode; v->arch.vgc_flags = flags; @@ -722,7 +722,7 @@ int arch_set_info_guest( if ( !compat ) { memcpy(&v->arch.user_regs, &c.nat->user_regs, sizeof(c.nat->user_regs)); - if ( !is_hvm_vcpu(v) ) + if ( is_pv_vcpu(v) ) memcpy(v->arch.pv_vcpu.trap_ctxt, c.nat->trap_ctxt, sizeof(c.nat->trap_ctxt)); } @@ -738,10 +738,13 @@ int arch_set_info_guest( v->arch.user_regs.eflags |= 2; - if ( is_hvm_vcpu(v) ) + if ( !is_pv_vcpu(v) ) { hvm_set_info_guest(v); - goto out; + if ( is_hvm_vcpu(v) || v->is_initialised ) + goto out; + else + goto pvh_skip_pv_stuff; } init_int80_direct_trap(v); @@ -750,7 +753,10 @@ int arch_set_info_guest( v->arch.pv_vcpu.iopl = (v->arch.user_regs.eflags >> 12) & 3; v->arch.user_regs.eflags &= ~X86_EFLAGS_IOPL; - /* Ensure real hardware interrupts are enabled. */ + /* + * Ensure real hardware interrupts are enabled. Note: PVH may not have + * IDT set on all vcpus so we don't enable IF for it yet. + */ v->arch.user_regs.eflags |= X86_EFLAGS_IF; if ( !v->is_initialised ) @@ -852,6 +858,7 @@ int arch_set_info_guest( set_bit(_VPF_in_reset, &v->pause_flags); +pvh_skip_pv_stuff: if ( !compat ) cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[3]); else @@ -860,7 +867,7 @@ int arch_set_info_guest( if ( !cr3_page ) rc = -EINVAL; - else if ( paging_mode_refcounts(d) ) + else if ( paging_mode_refcounts(d) || is_pvh_vcpu(v) ) /* nothing */; else if ( cr3_page == v->arch.old_guest_table ) { @@ -886,8 +893,15 @@ int arch_set_info_guest( /* handled below */; else if ( !compat ) { + /* PVH 32bitfixme */ + if ( is_pvh_vcpu(v) ) + { + v->arch.cr3 = page_to_mfn(cr3_page); + v->arch.hvm_vcpu.guest_cr[3] = c.nat->ctrlreg[3]; + } + v->arch.guest_table = pagetable_from_page(cr3_page); - if ( c.nat->ctrlreg[1] ) + if ( c.nat->ctrlreg[1] && !is_pvh_vcpu(v) ) { cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[1]); cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); @@ -942,6 +956,13 @@ int arch_set_info_guest( update_cr3(v); + if ( is_pvh_vcpu(v) ) + { + /* guest is bringing up non-boot SMP vcpu */ + if ( (rc=hvm_set_vcpu_info(v, c.nat)) != 0 ) + return rc; + } + out: if ( flags & VGCF_online ) clear_bit(_VPF_down, &v->pause_flags); @@ -1309,7 +1330,7 @@ static void update_runstate_area(struct vcpu *v) static inline int need_full_gdt(struct vcpu *v) { - return (!is_hvm_vcpu(v) && !is_idle_vcpu(v)); + return (is_pv_vcpu(v) && !is_idle_vcpu(v)); } static void __context_switch(void) @@ -1443,7 +1464,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next) /* Re-enable interrupts before restoring state which may fault. */ local_irq_enable(); - if ( !is_hvm_vcpu(next) ) + if ( is_pv_vcpu(next) ) { load_LDT(next); load_segments(next); @@ -1566,12 +1587,12 @@ unsigned long hypercall_create_continuation( regs->eax = op; /* Ensure the hypercall trap instruction is re-executed. */ - if ( !is_hvm_vcpu(current) ) + if ( is_pv_vcpu(current) ) regs->eip -= 2; /* re-execute 'syscall' / 'int $xx' */ else current->arch.hvm_vcpu.hcall_preempted = 1; - if ( !is_hvm_vcpu(current) ? + if ( is_pv_vcpu(current) ? !is_pv_32on64_vcpu(current) : (hvm_guest_x86_mode(current) == 8) ) { @@ -1839,7 +1860,7 @@ int domain_relinquish_resources(struct domain *d) return ret; } - if ( !is_hvm_domain(d) ) + if ( is_pv_domain(d) ) { for_each_vcpu ( d, v ) { @@ -1912,7 +1933,7 @@ int domain_relinquish_resources(struct domain *d) BUG(); } - if ( is_hvm_domain(d) ) + if ( !is_pv_domain(d) ) hvm_domain_relinquish_resources(d); return 0; @@ -1996,7 +2017,7 @@ void vcpu_mark_events_pending(struct vcpu *v) if ( already_pending ) return; - if ( is_hvm_vcpu(v) ) + if ( !is_pv_vcpu(v) ) hvm_assert_evtchn_irq(v); else vcpu_kick(v); diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index bd1402e..b190ad9 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -4330,6 +4330,9 @@ void destroy_gdt(struct vcpu *v) int i; unsigned long pfn; + if ( is_pvh_vcpu(v) ) + return; + v->arch.pv_vcpu.gdt_ents = 0; pl1e = gdt_ldt_ptes(v->domain, v); for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ ) diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c index bff05d9..5aa0852 100644 --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -639,7 +639,9 @@ static void hap_update_cr3(struct vcpu *v, int do_locking) const struct paging_mode * hap_paging_get_mode(struct vcpu *v) { - return !hvm_paging_enabled(v) ? &hap_paging_real_mode : + /* PVH 32bitfixme */ + return is_pvh_vcpu(v) ? &hap_paging_long_mode : + !hvm_paging_enabled(v) ? &hap_paging_real_mode : hvm_long_mode_enabled(v) ? &hap_paging_long_mode : hvm_pae_enabled(v) ? &hap_paging_pae_mode : &hap_paging_protected_mode; diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index 8408420..7e21ee1 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -192,6 +192,8 @@ struct hvm_function_table { paddr_t *L1_gpa, unsigned int *page_order, uint8_t *p2m_acc, bool_t access_r, bool_t access_w, bool_t access_x); + /* PVH functions */ + int (*pvh_set_vcpu_info)(struct vcpu *v, struct vcpu_guest_context *ctxtp); }; extern struct hvm_function_table hvm_funcs; @@ -325,6 +327,12 @@ static inline unsigned long hvm_get_shadow_gs_base(struct vcpu *v) return hvm_funcs.get_shadow_gs_base(v); } +static inline int hvm_set_vcpu_info(struct vcpu *v, + struct vcpu_guest_context *ctxtp) +{ + return hvm_funcs.pvh_set_vcpu_info(v, ctxtp); +} + #define is_viridian_domain(_d) \ (is_hvm_domain(_d) && ((_d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN])) diff --git a/xen/include/asm-x86/system.h b/xen/include/asm-x86/system.h index 9bb22cb..955983b 100644 --- a/xen/include/asm-x86/system.h +++ b/xen/include/asm-x86/system.h @@ -4,10 +4,20 @@ #include #include -#define read_segment_register(vcpu, regs, name) \ -({ u16 __sel; \ - asm volatile ( "movw %%" STR(name) ",%0" : "=r" (__sel) ); \ - __sel; \ +/* + * We need vcpu because during context switch, going from pure PV to PVH, + * in save_segments(), current has been updated to next, and no longer pointing + * to the pure PV. Note: for PVH, we update regs->selectors on each vmexit. + */ +#define read_segment_register(vcpu, regs, name) \ +({ u16 __sel; \ + struct cpu_user_regs *_regs = (regs); \ + \ + if ( is_pvh_vcpu(vcpu) ) \ + __sel = _regs->name; \ + else \ + asm volatile ( "movw %%" #name ",%0" : "=r" (__sel) ); \ + __sel; \ }) #define wbinvd() \ -- 1.7.2.3