From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mukesh Rathor Subject: [PATCH 09/20] PVH xen: domain creation code changes Date: Tue, 14 May 2013 17:52:37 -0700 Message-ID: <1368579168-30829-10-git-send-email-mukesh.rathor@oracle.com> References: <1368579168-30829-1-git-send-email-mukesh.rathor@oracle.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1368579168-30829-1-git-send-email-mukesh.rathor@oracle.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Xen-devel@lists.xensource.com List-Id: xen-devel@lists.xenproject.org This patch contains changes to arch/x86/domain.c to allow for a PVH domain. Changes in V2: - changes to read_segment_register() moved to this patch. - The other comment was to create NULL functions for pvh_set_vcpu_info and pvh_read_descriptor which are implemented in later patch, but since I disable PVH creation until all patches are checked in, it is not needed. But it helps breaking down of patches. Changes in V3: - Fix read_segment_register() macro to make sure args are evaluated once, and use # instead of STR for name in the macro. Changes in V4: - Remove pvh substruct in the hvm substruct, as the vcpu_info_mfn has been moved out of pv_vcpu struct. - rename hvm_pvh_* functions to hvm_*. Changes in V5: - remove pvh_read_descriptor(). Signed-off-by: Mukesh Rathor --- xen/arch/x86/domain.c | 69 ++++++++++++++++++++++++++-------------- xen/arch/x86/mm.c | 3 ++ xen/arch/x86/mm/hap/hap.c | 4 ++- xen/include/asm-x86/hvm/hvm.h | 8 +++++ xen/include/asm-x86/system.h | 18 ++++++++-- 5 files changed, 73 insertions(+), 29 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 80ff4a3..4883fd1 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -387,7 +387,7 @@ int vcpu_initialise(struct vcpu *v) v->arch.vcpu_info_mfn = INVALID_MFN; - if ( is_hvm_domain(d) ) + if ( !is_pv_domain(d) ) { rc = hvm_vcpu_initialise(v); goto done; @@ -454,7 +454,7 @@ void vcpu_destroy(struct vcpu *v) vcpu_destroy_fpu(v); - if ( is_hvm_vcpu(v) ) + if ( !is_pv_vcpu(v) ) hvm_vcpu_destroy(v); else xfree(v->arch.pv_vcpu.trap_ctxt); @@ -466,7 +466,7 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) int rc = -ENOMEM; d->arch.hvm_domain.hap_enabled = - is_hvm_domain(d) && + !is_pv_domain(d) && hvm_funcs.hap_supported && (domcr_flags & DOMCRF_hap); d->arch.hvm_domain.mem_sharing_enabled = 0; @@ -514,7 +514,7 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) mapcache_domain_init(d); HYPERVISOR_COMPAT_VIRT_START(d) = - is_hvm_domain(d) ? ~0u : __HYPERVISOR_COMPAT_VIRT_START; + is_pv_domain(d) ? __HYPERVISOR_COMPAT_VIRT_START : ~0u; if ( (rc = paging_domain_init(d, domcr_flags)) != 0 ) goto fail; @@ -556,7 +556,7 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) goto fail; } - if ( is_hvm_domain(d) ) + if ( !is_pv_domain(d) ) { if ( (rc = hvm_domain_initialise(d)) != 0 ) { @@ -565,12 +565,11 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) } } else - { /* 64-bit PV guest by default. */ d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0; + if ( !is_hvm_domain(d) ) spin_lock_init(&d->arch.e820_lock); - } /* initialize default tsc behavior in case tools don't */ tsc_set_info(d, TSC_MODE_DEFAULT, 0UL, 0, 0); @@ -592,9 +591,10 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) void arch_domain_destroy(struct domain *d) { - if ( is_hvm_domain(d) ) + if ( !is_pv_domain(d) ) hvm_domain_destroy(d); - else + + if ( !is_hvm_domain(d) ) xfree(d->arch.e820); free_domain_pirqs(d); @@ -662,7 +662,7 @@ int arch_set_info_guest( #define c(fld) (compat ? (c.cmp->fld) : (c.nat->fld)) flags = c(flags); - if ( !is_hvm_vcpu(v) ) + if ( is_pv_vcpu(v) ) { if ( !compat ) { @@ -715,7 +715,7 @@ int arch_set_info_guest( v->fpu_initialised = !!(flags & VGCF_I387_VALID); v->arch.flags &= ~TF_kernel_mode; - if ( (flags & VGCF_in_kernel) || is_hvm_vcpu(v)/*???*/ ) + if ( (flags & VGCF_in_kernel) || !is_pv_vcpu(v)/*???*/ ) v->arch.flags |= TF_kernel_mode; v->arch.vgc_flags = flags; @@ -726,7 +726,7 @@ int arch_set_info_guest( if ( !compat ) { memcpy(&v->arch.user_regs, &c.nat->user_regs, sizeof(c.nat->user_regs)); - if ( !is_hvm_vcpu(v) ) + if ( is_pv_vcpu(v) ) memcpy(v->arch.pv_vcpu.trap_ctxt, c.nat->trap_ctxt, sizeof(c.nat->trap_ctxt)); } @@ -742,10 +742,13 @@ int arch_set_info_guest( v->arch.user_regs.eflags |= 2; - if ( is_hvm_vcpu(v) ) + if ( !is_pv_vcpu(v) ) { hvm_set_info_guest(v); - goto out; + if ( is_hvm_vcpu(v) || v->is_initialised ) + goto out; + else + goto pvh_skip_pv_stuff; } init_int80_direct_trap(v); @@ -754,7 +757,10 @@ int arch_set_info_guest( v->arch.pv_vcpu.iopl = (v->arch.user_regs.eflags >> 12) & 3; v->arch.user_regs.eflags &= ~X86_EFLAGS_IOPL; - /* Ensure real hardware interrupts are enabled. */ + /* + * Ensure real hardware interrupts are enabled. Note: PVH may not have + * IDT set on all vcpus so we don't enable IF for it yet. + */ v->arch.user_regs.eflags |= X86_EFLAGS_IF; if ( !v->is_initialised ) @@ -856,6 +862,7 @@ int arch_set_info_guest( set_bit(_VPF_in_reset, &v->pause_flags); +pvh_skip_pv_stuff: if ( !compat ) cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[3]); else @@ -864,7 +871,7 @@ int arch_set_info_guest( if ( !cr3_page ) rc = -EINVAL; - else if ( paging_mode_refcounts(d) ) + else if ( paging_mode_refcounts(d) || is_pvh_vcpu(v) ) /* nothing */; else if ( cr3_page == v->arch.old_guest_table ) { @@ -890,8 +897,15 @@ int arch_set_info_guest( /* handled below */; else if ( !compat ) { + /* PVH 32bitfixme */ + if ( is_pvh_vcpu(v) ) + { + v->arch.cr3 = page_to_mfn(cr3_page); + v->arch.hvm_vcpu.guest_cr[3] = c.nat->ctrlreg[3]; + } + v->arch.guest_table = pagetable_from_page(cr3_page); - if ( c.nat->ctrlreg[1] ) + if ( c.nat->ctrlreg[1] && !is_pvh_vcpu(v) ) { cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[1]); cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); @@ -946,6 +960,13 @@ int arch_set_info_guest( update_cr3(v); + if ( is_pvh_vcpu(v) ) + { + /* guest is bringing up non-boot SMP vcpu */ + if ( (rc=hvm_set_vcpu_info(v, c.nat)) != 0 ) + return rc; + } + out: if ( flags & VGCF_online ) clear_bit(_VPF_down, &v->pause_flags); @@ -1450,7 +1471,7 @@ static void update_runstate_area(struct vcpu *v) static inline int need_full_gdt(struct vcpu *v) { - return (!is_hvm_vcpu(v) && !is_idle_vcpu(v)); + return (is_pv_vcpu(v) && !is_idle_vcpu(v)); } static void __context_switch(void) @@ -1584,7 +1605,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next) /* Re-enable interrupts before restoring state which may fault. */ local_irq_enable(); - if ( !is_hvm_vcpu(next) ) + if ( is_pv_vcpu(next) ) { load_LDT(next); load_segments(next); @@ -1707,12 +1728,12 @@ unsigned long hypercall_create_continuation( regs->eax = op; /* Ensure the hypercall trap instruction is re-executed. */ - if ( !is_hvm_vcpu(current) ) + if ( is_pv_vcpu(current) ) regs->eip -= 2; /* re-execute 'syscall' / 'int $xx' */ else current->arch.hvm_vcpu.hcall_preempted = 1; - if ( !is_hvm_vcpu(current) ? + if ( is_pv_vcpu(current) ? !is_pv_32on64_vcpu(current) : (hvm_guest_x86_mode(current) == 8) ) { @@ -1982,7 +2003,7 @@ int domain_relinquish_resources(struct domain *d) unmap_vcpu_info(v); } - if ( !is_hvm_domain(d) ) + if ( is_pv_domain(d) ) { for_each_vcpu ( d, v ) { @@ -2055,7 +2076,7 @@ int domain_relinquish_resources(struct domain *d) BUG(); } - if ( is_hvm_domain(d) ) + if ( !is_pv_domain(d) ) hvm_domain_relinquish_resources(d); return 0; @@ -2139,7 +2160,7 @@ void vcpu_mark_events_pending(struct vcpu *v) if ( already_pending ) return; - if ( is_hvm_vcpu(v) ) + if ( !is_pv_vcpu(v) ) hvm_assert_evtchn_irq(v); else vcpu_kick(v); diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 60f1a4f..ef37053 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -4330,6 +4330,9 @@ void destroy_gdt(struct vcpu *v) int i; unsigned long pfn; + if ( is_pvh_vcpu(v) ) + return; + v->arch.pv_vcpu.gdt_ents = 0; pl1e = gdt_ldt_ptes(v->domain, v); for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ ) diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c index bff05d9..5aa0852 100644 --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -639,7 +639,9 @@ static void hap_update_cr3(struct vcpu *v, int do_locking) const struct paging_mode * hap_paging_get_mode(struct vcpu *v) { - return !hvm_paging_enabled(v) ? &hap_paging_real_mode : + /* PVH 32bitfixme */ + return is_pvh_vcpu(v) ? &hap_paging_long_mode : + !hvm_paging_enabled(v) ? &hap_paging_real_mode : hvm_long_mode_enabled(v) ? &hap_paging_long_mode : hvm_pae_enabled(v) ? &hap_paging_pae_mode : &hap_paging_protected_mode; diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index 8408420..9b5fa5b 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -192,6 +192,8 @@ struct hvm_function_table { paddr_t *L1_gpa, unsigned int *page_order, uint8_t *p2m_acc, bool_t access_r, bool_t access_w, bool_t access_x); + /* PVH functions */ + int (*pvh_set_vcpu_info)(struct vcpu *v, struct vcpu_guest_context *ctxtp); }; extern struct hvm_function_table hvm_funcs; @@ -325,6 +327,12 @@ static inline unsigned long hvm_get_shadow_gs_base(struct vcpu *v) return hvm_funcs.get_shadow_gs_base(v); } +static inline int hvm_set_vcpu_info(struct vcpu *v, + struct vcpu_guest_context *ctxtp) +{ + return hvm_funcs.pvh_set_vcpu_info(v, ctxtp); +} + #define is_viridian_domain(_d) \ (is_hvm_domain(_d) && ((_d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN])) diff --git a/xen/include/asm-x86/system.h b/xen/include/asm-x86/system.h index d8dc6f2..7780c16 100644 --- a/xen/include/asm-x86/system.h +++ b/xen/include/asm-x86/system.h @@ -4,10 +4,20 @@ #include #include -#define read_segment_register(vcpu, regs, name) \ -({ u16 __sel; \ - asm volatile ( "movw %%" STR(name) ",%0" : "=r" (__sel) ); \ - __sel; \ +/* + * We need vcpu because during context switch, going from pure PV to PVH, + * in save_segments(), current has been updated to next, and no longer pointing + * to the pure PV. Note: for PVH, we update regs->selectors on each vmexit. + */ +#define read_segment_register(vcpu, regs, name) \ +({ u16 __sel; \ + struct cpu_user_regs *_regs = (regs); \ + \ + if ( is_pvh_vcpu(vcpu) ) \ + __sel = _regs->name; \ + else \ + asm volatile ( "movw %%" #name ",%0" : "=r" (__sel) ); \ + __sel; \ }) #define wbinvd() \ -- 1.7.2.3