From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mukesh Rathor Subject: [PATCH 10/20] PVH xen: create PVH vmcs, and also initialization Date: Tue, 14 May 2013 17:52:38 -0700 Message-ID: <1368579168-30829-11-git-send-email-mukesh.rathor@oracle.com> References: <1368579168-30829-1-git-send-email-mukesh.rathor@oracle.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1368579168-30829-1-git-send-email-mukesh.rathor@oracle.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Xen-devel@lists.xensource.com List-Id: xen-devel@lists.xenproject.org This patch mainly contains code to create a VMCS for PVH guest, and HVM specific vcpu/domain creation code. Changes in V2: - Avoid call to hvm_do_resume() at call site rather than return in it. - Return for PVH vmx_do_resume prior to intel debugger stuff. Changes in V3: - Cleanup pvh_construct_vmcs(). - Fix formatting in few places, adding XENLOG_G_ERR to printing. - Do not load the CS selector for PVH here, but try to do that in Linux. Changes in V4: - Remove VM_ENTRY_LOAD_DEBUG_CTLS clearing. - Add 32bit kernel changes mark. - Verify pit_init call for PVH. Changes in V5: - formatting. remove unnecessary variable guest_pat. Signed-off-by: Mukesh Rathor --- xen/arch/x86/hvm/hvm.c | 94 ++++++++++++- xen/arch/x86/hvm/vmx/vmcs.c | 312 ++++++++++++++++++++++++++++++++++++++---- xen/arch/x86/hvm/vmx/vmx.c | 40 ++++++ 3 files changed, 410 insertions(+), 36 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 7c3cb15..e103c70 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -510,6 +510,30 @@ static int hvm_print_line( return X86EMUL_OKAY; } +static int pvh_dom_initialise(struct domain *d) +{ + int rc; + + if ( !d->arch.hvm_domain.hap_enabled ) + return -EINVAL; + + spin_lock_init(&d->arch.hvm_domain.irq_lock); + + hvm_init_cacheattr_region_list(d); + + if ( (rc = paging_enable(d, PG_refcounts|PG_translate|PG_external)) != 0 ) + goto fail1; + + if ( (rc = hvm_funcs.domain_initialise(d)) != 0 ) + goto fail1; + + return 0; + +fail1: + hvm_destroy_cacheattr_region_list(d); + return rc; +} + int hvm_domain_initialise(struct domain *d) { int rc; @@ -520,6 +544,8 @@ int hvm_domain_initialise(struct domain *d) "on a non-VT/AMDV platform.\n"); return -EINVAL; } + if ( is_pvh_domain(d) ) + return pvh_dom_initialise(d); spin_lock_init(&d->arch.hvm_domain.pbuf_lock); spin_lock_init(&d->arch.hvm_domain.irq_lock); @@ -584,6 +610,11 @@ int hvm_domain_initialise(struct domain *d) void hvm_domain_relinquish_resources(struct domain *d) { + if ( is_pvh_domain(d) ) + { + pit_deinit(d); + return; + } if ( hvm_funcs.nhvm_domain_relinquish_resources ) hvm_funcs.nhvm_domain_relinquish_resources(d); @@ -609,10 +640,14 @@ void hvm_domain_relinquish_resources(struct domain *d) void hvm_domain_destroy(struct domain *d) { hvm_funcs.domain_destroy(d); + hvm_destroy_cacheattr_region_list(d); + + if ( is_pvh_domain(d) ) + return; + rtc_deinit(d); stdvga_deinit(d); vioapic_deinit(d); - hvm_destroy_cacheattr_region_list(d); } static int hvm_save_tsc_adjust(struct domain *d, hvm_domain_context_t *h) @@ -1066,14 +1101,46 @@ static int __init __hvm_register_CPU_XSAVE_save_and_restore(void) } __initcall(__hvm_register_CPU_XSAVE_save_and_restore); +static int pvh_vcpu_initialise(struct vcpu *v) +{ + int rc; + + if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 ) + return rc; + + softirq_tasklet_init(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet, + (void(*)(unsigned long))hvm_assert_evtchn_irq, + (unsigned long)v); + + v->arch.hvm_vcpu.hcall_64bit = 1; /* PVH 32bitfixme */ + v->arch.user_regs.eflags = 2; + v->arch.hvm_vcpu.inject_trap.vector = -1; + + if ( (rc = hvm_vcpu_cacheattr_init(v)) != 0 ) + { + hvm_funcs.vcpu_destroy(v); + return rc; + } + if ( v->vcpu_id == 0 ) + pit_init(v, cpu_khz); + + return 0; +} + int hvm_vcpu_initialise(struct vcpu *v) { int rc; struct domain *d = v->domain; - domid_t dm_domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN]; + domid_t dm_domid; hvm_asid_flush_vcpu(v); + spin_lock_init(&v->arch.hvm_vcpu.tm_lock); + INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list); + + if ( is_pvh_vcpu(v) ) + return pvh_vcpu_initialise(v); + if ( (rc = vlapic_init(v)) != 0 ) goto fail1; @@ -1084,6 +1151,8 @@ int hvm_vcpu_initialise(struct vcpu *v) && (rc = nestedhvm_vcpu_initialise(v)) < 0 ) goto fail3; + dm_domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN]; + /* Create ioreq event channel. */ rc = alloc_unbound_xen_event_channel(v, dm_domid, NULL); if ( rc < 0 ) @@ -1106,9 +1175,6 @@ int hvm_vcpu_initialise(struct vcpu *v) get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port; spin_unlock(&d->arch.hvm_domain.ioreq.lock); - spin_lock_init(&v->arch.hvm_vcpu.tm_lock); - INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list); - v->arch.hvm_vcpu.inject_trap.vector = -1; rc = setup_compat_arg_xlat(v); @@ -1163,7 +1229,10 @@ void hvm_vcpu_destroy(struct vcpu *v) tasklet_kill(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet); hvm_vcpu_cacheattr_destroy(v); - vlapic_destroy(v); + + if ( !is_pvh_vcpu(v) ) + vlapic_destroy(v); + hvm_funcs.vcpu_destroy(v); /* Event channel is already freed by evtchn_destroy(). */ @@ -4511,8 +4580,11 @@ static int hvm_memory_event_traps(long p, uint32_t reason, return 1; } +/* PVH fixme: add support for monitoring guest behaviour in below functions */ void hvm_memory_event_cr0(unsigned long value, unsigned long old) { + if ( is_pvh_vcpu(current) ) + return; hvm_memory_event_traps(current->domain->arch.hvm_domain .params[HVM_PARAM_MEMORY_EVENT_CR0], MEM_EVENT_REASON_CR0, @@ -4521,6 +4593,8 @@ void hvm_memory_event_cr0(unsigned long value, unsigned long old) void hvm_memory_event_cr3(unsigned long value, unsigned long old) { + if ( is_pvh_vcpu(current) ) + return; hvm_memory_event_traps(current->domain->arch.hvm_domain .params[HVM_PARAM_MEMORY_EVENT_CR3], MEM_EVENT_REASON_CR3, @@ -4529,6 +4603,8 @@ void hvm_memory_event_cr3(unsigned long value, unsigned long old) void hvm_memory_event_cr4(unsigned long value, unsigned long old) { + if ( is_pvh_vcpu(current) ) + return; hvm_memory_event_traps(current->domain->arch.hvm_domain .params[HVM_PARAM_MEMORY_EVENT_CR4], MEM_EVENT_REASON_CR4, @@ -4537,6 +4613,8 @@ void hvm_memory_event_cr4(unsigned long value, unsigned long old) void hvm_memory_event_msr(unsigned long msr, unsigned long value) { + if ( is_pvh_vcpu(current) ) + return; hvm_memory_event_traps(current->domain->arch.hvm_domain .params[HVM_PARAM_MEMORY_EVENT_MSR], MEM_EVENT_REASON_MSR, @@ -4549,6 +4627,8 @@ int hvm_memory_event_int3(unsigned long gla) unsigned long gfn; gfn = paging_gva_to_gfn(current, gla, &pfec); + if ( is_pvh_vcpu(current) ) + return 0; return hvm_memory_event_traps(current->domain->arch.hvm_domain .params[HVM_PARAM_MEMORY_EVENT_INT3], MEM_EVENT_REASON_INT3, @@ -4561,6 +4641,8 @@ int hvm_memory_event_single_step(unsigned long gla) unsigned long gfn; gfn = paging_gva_to_gfn(current, gla, &pfec); + if ( is_pvh_vcpu(current) ) + return 0; return hvm_memory_event_traps(current->domain->arch.hvm_domain .params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP], MEM_EVENT_REASON_SINGLESTEP, diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index ef0ee7f..2ad07fd 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -634,7 +634,7 @@ void vmx_vmcs_exit(struct vcpu *v) { /* Don't confuse vmx_do_resume (for @v or @current!) */ vmx_clear_vmcs(v); - if ( is_hvm_vcpu(current) ) + if ( !is_pv_vcpu(current) ) vmx_load_vmcs(current); spin_unlock(&v->arch.hvm_vmx.vmcs_lock); @@ -825,16 +825,285 @@ void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val) virtual_vmcs_exit(vvmcs); } -static int construct_vmcs(struct vcpu *v) +static void vmx_set_common_host_vmcs_fields(struct vcpu *v) { - struct domain *d = v->domain; uint16_t sysenter_cs; unsigned long sysenter_eip; + + /* Host data selectors. */ + __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS); + __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS); + __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS); + __vmwrite(HOST_FS_SELECTOR, 0); + __vmwrite(HOST_GS_SELECTOR, 0); + __vmwrite(HOST_FS_BASE, 0); + __vmwrite(HOST_GS_BASE, 0); + + /* Host control registers. */ + v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS; + __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0); + __vmwrite(HOST_CR4, + mmu_cr4_features | (xsave_enabled(v) ? X86_CR4_OSXSAVE : 0)); + + /* Host CS:RIP. */ + __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS); + __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler); + + /* Host SYSENTER CS:RIP. */ + rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs); + __vmwrite(HOST_SYSENTER_CS, sysenter_cs); + rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip); + __vmwrite(HOST_SYSENTER_EIP, sysenter_eip); +} + +static int pvh_check_requirements(struct vcpu *v) +{ + u64 required, tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features); + + if ( !paging_mode_hap(v->domain) ) + { + printk(XENLOG_G_INFO "HAP is required for PVH guest.\n"); + return -EINVAL; + } + if ( !cpu_has_vmx_pat ) + { + printk(XENLOG_G_INFO "PVH: CPU does not have PAT support\n"); + return -ENOSYS; + } + if ( !cpu_has_vmx_msr_bitmap ) + { + printk(XENLOG_G_INFO "PVH: CPU does not have msr bitmap\n"); + return -ENOSYS; + } + if ( !cpu_has_vmx_vpid ) + { + printk(XENLOG_G_INFO "PVH: CPU doesn't have VPID support\n"); + return -ENOSYS; + } + if ( !cpu_has_vmx_secondary_exec_control ) + { + printk(XENLOG_G_INFO "CPU Secondary exec is required to run PVH\n"); + return -ENOSYS; + } + + if ( v->domain->arch.vtsc ) + { + printk(XENLOG_G_INFO + "At present PVH only supports the default timer mode\n"); + return -ENOSYS; + } + + required = X86_CR4_PAE | X86_CR4_VMXE | X86_CR4_OSFXSR; + if ( (tmpval & required) != required ) + { + printk(XENLOG_G_INFO "PVH: required CR4 features not available:%lx\n", + required); + return -ENOSYS; + } + + return 0; +} + +static int pvh_construct_vmcs(struct vcpu *v) +{ + int rc, msr_type; + unsigned long *msr_bitmap; + struct domain *d = v->domain; + struct p2m_domain *p2m = p2m_get_hostp2m(d); + struct ept_data *ept = &p2m->ept; + u32 vmexit_ctl = vmx_vmexit_control; + u32 vmentry_ctl = vmx_vmentry_control; + u64 host_pat, tmpval = -1; + + if ( (rc = pvh_check_requirements(v)) ) + return rc; + + msr_bitmap = alloc_xenheap_page(); + if ( msr_bitmap == NULL ) + return -ENOMEM; + + /* 1. Pin-Based Controls */ + __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control); + + v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control; + + /* 2. Primary Processor-based controls */ + /* + * If rdtsc exiting is turned on and it goes thru emulate_privileged_op, + * then pv_vcpu.ctrlreg must be added to the pvh struct. + */ + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_RDTSC_EXITING; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_USE_TSC_OFFSETING; + + v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING | + CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); + v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG; + v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_MSR_BITMAP; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_TPR_SHADOW; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; + + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control); + + /* 3. Secondary Processor-based controls. Intel SDM: all resvd bits are 0*/ + v->arch.hvm_vmx.secondary_exec_control = SECONDARY_EXEC_ENABLE_EPT; + v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_VPID; + v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_PAUSE_LOOP_EXITING; + + __vmwrite(SECONDARY_VM_EXEC_CONTROL, + v->arch.hvm_vmx.secondary_exec_control); + + __vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0)); + __vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE)); + + /* MSR bitmap for intercepts */ + memset(msr_bitmap, ~0, PAGE_SIZE); + v->arch.hvm_vmx.msr_bitmap = msr_bitmap; + __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap)); + + msr_type = MSR_TYPE_R | MSR_TYPE_W; + /* Disable interecepts for MSRs that have corresponding VMCS fields */ + vmx_disable_intercept_for_msr(v, MSR_FS_BASE, msr_type); + vmx_disable_intercept_for_msr(v, MSR_GS_BASE, msr_type); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, msr_type); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, msr_type); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, msr_type); + vmx_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE, msr_type); + vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, msr_type); + + /* + * We don't disable intercepts for MSRs: MSR_STAR, MSR_LSTAR, MSR_CSTAR, + * and MSR_SYSCALL_MASK because we need to specify save/restore area to + * save/restore at every VM exit and entry. Instead, let the intercept + * functions save them into vmx_msr_state fields. See comment in + * vmx_restore_host_msrs(). See also vmx_restore_guest_msrs(). + */ + __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); + __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); + + __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl); + + /* + * Note: we run with default VM_ENTRY_LOAD_DEBUG_CTLS of 1, which means + * upon vmentry, the cpu reads/loads VMCS.DR7 and VMCS.DEBUGCTLS, and not + * use the host values. 0 would cause it to not use the VMCS values. + */ + vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_EFER; + vmentry_ctl &= ~VM_ENTRY_SMM; + vmentry_ctl &= ~VM_ENTRY_DEACT_DUAL_MONITOR; + /* PVH 32bitfixme */ + vmentry_ctl |= VM_ENTRY_IA32E_MODE; /* GUEST_EFER.LME/LMA ignored */ + + __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl); + + vmx_set_common_host_vmcs_fields(v); + + __vmwrite(VM_ENTRY_INTR_INFO, 0); + __vmwrite(CR3_TARGET_COUNT, 0); + __vmwrite(GUEST_ACTIVITY_STATE, 0); + + /* These are sorta irrelevant as we load the discriptors directly. */ + __vmwrite(GUEST_CS_SELECTOR, 0); + __vmwrite(GUEST_DS_SELECTOR, 0); + __vmwrite(GUEST_SS_SELECTOR, 0); + __vmwrite(GUEST_ES_SELECTOR, 0); + __vmwrite(GUEST_FS_SELECTOR, 0); + __vmwrite(GUEST_GS_SELECTOR, 0); + + __vmwrite(GUEST_CS_BASE, 0); + __vmwrite(GUEST_CS_LIMIT, ~0u); + /* CS.L == 1, exec, read/write, accessed. PVH 32bitfixme */ + __vmwrite(GUEST_CS_AR_BYTES, 0xa09b); + + __vmwrite(GUEST_DS_BASE, 0); + __vmwrite(GUEST_DS_LIMIT, ~0u); + __vmwrite(GUEST_DS_AR_BYTES, 0xc093); /* read/write, accessed */ + + __vmwrite(GUEST_SS_BASE, 0); + __vmwrite(GUEST_SS_LIMIT, ~0u); + __vmwrite(GUEST_SS_AR_BYTES, 0xc093); /* read/write, accessed */ + + __vmwrite(GUEST_ES_BASE, 0); + __vmwrite(GUEST_ES_LIMIT, ~0u); + __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */ + + __vmwrite(GUEST_FS_BASE, 0); + __vmwrite(GUEST_FS_LIMIT, ~0u); + __vmwrite(GUEST_FS_AR_BYTES, 0xc093); /* read/write, accessed */ + + __vmwrite(GUEST_GS_BASE, 0); + __vmwrite(GUEST_GS_LIMIT, ~0u); + __vmwrite(GUEST_GS_AR_BYTES, 0xc093); /* read/write, accessed */ + + __vmwrite(GUEST_GDTR_BASE, 0); + __vmwrite(GUEST_GDTR_LIMIT, 0); + + __vmwrite(GUEST_LDTR_BASE, 0); + __vmwrite(GUEST_LDTR_LIMIT, 0); + __vmwrite(GUEST_LDTR_AR_BYTES, 0x82); /* LDT */ + __vmwrite(GUEST_LDTR_SELECTOR, 0); + + /* Guest TSS. */ + __vmwrite(GUEST_TR_BASE, 0); + __vmwrite(GUEST_TR_LIMIT, 0xff); + __vmwrite(GUEST_TR_AR_BYTES, 0x8b); /* 32-bit TSS (busy) */ + + __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); + __vmwrite(GUEST_DR7, 0); + __vmwrite(VMCS_LINK_POINTER, ~0UL); + + __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); + __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0); + + v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK | (1U << TRAP_debug) | + (1U << TRAP_int3) | (1U << TRAP_no_device); + __vmwrite(EXCEPTION_BITMAP, v->arch.hvm_vmx.exception_bitmap); + + /* Set WP bit so rdonly pages are not written from CPL 0 */ + tmpval = X86_CR0_PG | X86_CR0_NE | X86_CR0_PE | X86_CR0_WP; + __vmwrite(GUEST_CR0, tmpval); + __vmwrite(CR0_READ_SHADOW, tmpval); + v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0] = tmpval; + + tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features); + __vmwrite(GUEST_CR4, tmpval); + __vmwrite(CR4_READ_SHADOW, tmpval); + v->arch.hvm_vcpu.guest_cr[4] = tmpval; + + __vmwrite(CR0_GUEST_HOST_MASK, ~0UL); + __vmwrite(CR4_GUEST_HOST_MASK, ~0UL); + + v->arch.hvm_vmx.vmx_realmode = 0; + + ept->asr = pagetable_get_pfn(p2m_get_pagetable(p2m)); + __vmwrite(EPT_POINTER, ept_get_eptp(ept)); + + rdmsrl(MSR_IA32_CR_PAT, host_pat); + __vmwrite(HOST_PAT, host_pat); + __vmwrite(GUEST_PAT, MSR_IA32_CR_PAT_RESET); + + /* the paging mode is updated for PVH by arch_set_info_guest() */ + + return 0; +} + +static int construct_vmcs(struct vcpu *v) +{ + struct domain *d = v->domain; u32 vmexit_ctl = vmx_vmexit_control; u32 vmentry_ctl = vmx_vmentry_control; vmx_vmcs_enter(v); + if ( is_pvh_vcpu(v) ) + { + int rc = pvh_construct_vmcs(v); + vmx_vmcs_exit(v); + return rc; + } + /* VMCS controls. */ __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control); @@ -932,30 +1201,7 @@ static int construct_vmcs(struct vcpu *v) __vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector); } - /* Host data selectors. */ - __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS); - __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS); - __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS); - __vmwrite(HOST_FS_SELECTOR, 0); - __vmwrite(HOST_GS_SELECTOR, 0); - __vmwrite(HOST_FS_BASE, 0); - __vmwrite(HOST_GS_BASE, 0); - - /* Host control registers. */ - v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS; - __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0); - __vmwrite(HOST_CR4, - mmu_cr4_features | (xsave_enabled(v) ? X86_CR4_OSXSAVE : 0)); - - /* Host CS:RIP. */ - __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS); - __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler); - - /* Host SYSENTER CS:RIP. */ - rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs); - __vmwrite(HOST_SYSENTER_CS, sysenter_cs); - rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip); - __vmwrite(HOST_SYSENTER_EIP, sysenter_eip); + vmx_set_common_host_vmcs_fields(v); /* MSR intercepts. */ __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); @@ -1275,8 +1521,11 @@ void vmx_do_resume(struct vcpu *v) vmx_clear_vmcs(v); vmx_load_vmcs(v); - hvm_migrate_timers(v); - hvm_migrate_pirqs(v); + if ( !is_pvh_vcpu(v) ) + { + hvm_migrate_timers(v); + hvm_migrate_pirqs(v); + } vmx_set_host_env(v); /* * Both n1 VMCS and n2 VMCS need to update the host environment after @@ -1288,6 +1537,9 @@ void vmx_do_resume(struct vcpu *v) hvm_asid_flush_vcpu(v); } + if ( is_pvh_vcpu(v) ) + reset_stack_and_jump(vmx_asm_do_vmentry); + debug_state = v->domain->debugger_attached || v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_INT3] || v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP]; @@ -1471,7 +1723,7 @@ static void vmcs_dump(unsigned char ch) for_each_domain ( d ) { - if ( !is_hvm_domain(d) ) + if ( is_pv_domain(d) ) continue; printk("\n>>> Domain %d <<<\n", d->domain_id); for_each_vcpu ( d, v ) diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 7e5dba8..bd4c8bd 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -82,6 +82,9 @@ static int vmx_domain_initialise(struct domain *d) { int rc; + if ( is_pvh_domain(d) ) + return 0; + if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 ) return rc; @@ -90,6 +93,9 @@ static int vmx_domain_initialise(struct domain *d) static void vmx_domain_destroy(struct domain *d) { + if ( is_pvh_domain(d) ) + return; + vmx_free_vlapic_mapping(d); } @@ -113,6 +119,12 @@ static int vmx_vcpu_initialise(struct vcpu *v) vpmu_initialise(v); + if ( is_pvh_vcpu(v) ) + { + /* this for hvm_long_mode_enabled(v) */ + v->arch.hvm_vcpu.guest_efer = EFER_SCE | EFER_LMA | EFER_LME; + return 0; + } vmx_install_vlapic_mapping(v); /* %eax == 1 signals full real-mode support to the guest loader. */ @@ -1034,6 +1046,28 @@ static void vmx_update_host_cr3(struct vcpu *v) vmx_vmcs_exit(v); } +/* + * PVH guest never causes CR3 write vmexit. This called during the guest + * setup. + */ +static void vmx_update_pvh_cr(struct vcpu *v, unsigned int cr) +{ + vmx_vmcs_enter(v); + switch ( cr ) + { + case 3: + __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.guest_cr[3]); + hvm_asid_flush_vcpu(v); + break; + + default: + printk(XENLOG_ERR + "PVH: d%d v%d unexpected cr%d update at rip:%lx\n", + v->domain->domain_id, v->vcpu_id, cr, __vmread(GUEST_RIP)); + } + vmx_vmcs_exit(v); +} + void vmx_update_debug_state(struct vcpu *v) { unsigned long mask; @@ -1053,6 +1087,12 @@ void vmx_update_debug_state(struct vcpu *v) static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) { + if ( is_pvh_vcpu(v) ) + { + vmx_update_pvh_cr(v, cr); + return; + } + vmx_vmcs_enter(v); switch ( cr ) -- 1.7.2.3