[PATCH 9/18 V2]: PVH xen: create PVH vmcs, and initialization

* [PATCH 9/18 V2]: PVH xen: create PVH vmcs, and initialization
@ 2013-03-16  0:39 Mukesh Rathor
  2013-03-18 12:03 ` Jan Beulich
  2013-03-18 15:28 ` Konrad Rzeszutek Wilk
  0 siblings, 2 replies; 7+ messages in thread
From: Mukesh Rathor @ 2013-03-16  0:39 UTC (permalink / raw)
  To: Xen-devel

This patch mainly contains code to create a VMCS for PVH guest, and HVM
 specific vcpu/domain creation code.

Changes in V2:
  - Avoid call to hvm_do_resume() at call site rather than return in it.
  - Return for PVH vmx_do_resume prior to intel debugger stuff.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
---
 xen/arch/x86/hvm/hvm.c      |   90 ++++++++++++++-
 xen/arch/x86/hvm/vmx/vmcs.c |  266 ++++++++++++++++++++++++++++++++++++++++++-
 xen/arch/x86/hvm/vmx/vmx.c  |   34 ++++++
 3 files changed, 383 insertions(+), 7 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index ea7adf6..18889ad 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -510,6 +510,29 @@ static int hvm_print_line(
     return X86EMUL_OKAY;
 }
 
+static int hvm_pvh_dom_initialise(struct domain *d)
+{
+    int rc;
+
+    if (!d->arch.hvm_domain.hap_enabled)
+        return -EINVAL;
+
+    spin_lock_init(&d->arch.hvm_domain.irq_lock);
+    hvm_init_guest_time(d);
+
+    hvm_init_cacheattr_region_list(d);
+
+    if ( (rc=paging_enable(d, PG_refcounts|PG_translate|PG_external)) != 0 )
+        goto fail1;
+
+    if ( (rc = hvm_funcs.domain_initialise(d)) == 0 )
+        return 0;
+
+fail1:
+    hvm_destroy_cacheattr_region_list(d);
+    return rc;
+}
+
 int hvm_domain_initialise(struct domain *d)
 {
     int rc;
@@ -520,6 +543,8 @@ int hvm_domain_initialise(struct domain *d)
                  "on a non-VT/AMDV platform.\n");
         return -EINVAL;
     }
+    if ( is_pvh_domain(d) )
+        return hvm_pvh_dom_initialise(d);
 
     spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
     spin_lock_init(&d->arch.hvm_domain.irq_lock);
@@ -584,6 +609,11 @@ int hvm_domain_initialise(struct domain *d)
 
 void hvm_domain_relinquish_resources(struct domain *d)
 {
+    if ( is_pvh_domain(d) ) 
+    {
+        pit_deinit(d);
+        return;
+    }
     if ( hvm_funcs.nhvm_domain_relinquish_resources )
         hvm_funcs.nhvm_domain_relinquish_resources(d);
 
@@ -609,10 +639,14 @@ void hvm_domain_relinquish_resources(struct domain *d)
 void hvm_domain_destroy(struct domain *d)
 {
     hvm_funcs.domain_destroy(d);
+    hvm_destroy_cacheattr_region_list(d);
+
+    if ( is_pvh_domain(d) )
+        return;
+
     rtc_deinit(d);
     stdvga_deinit(d);
     vioapic_deinit(d);
-    hvm_destroy_cacheattr_region_list(d);
 }
 
 static int hvm_save_tsc_adjust(struct domain *d, hvm_domain_context_t *h)
@@ -1066,14 +1100,47 @@ static int __init __hvm_register_CPU_XSAVE_save_and_restore(void)
 }
 __initcall(__hvm_register_CPU_XSAVE_save_and_restore);
 
+static int hvm_pvh_vcpu_initialise(struct vcpu *v)
+{
+    int rc;
+
+    if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
+        return rc;
+
+    softirq_tasklet_init( &v->arch.hvm_vcpu.assert_evtchn_irq_tasklet,
+                          (void(*)(unsigned long))hvm_assert_evtchn_irq,
+                          (unsigned long)v );
+
+    v->arch.hvm_vcpu.hcall_64bit = 1;
+    v->arch.hvm_vcpu.hvm_pvh.vcpu_info_mfn = INVALID_MFN;
+    v->arch.user_regs.eflags = 2;
+    v->arch.hvm_vcpu.inject_trap.vector = -1;
+
+    if ( (rc=hvm_vcpu_cacheattr_init(v)) != 0 ) {
+        hvm_funcs.vcpu_destroy(v);
+        return rc;
+    }
+
+    /* during domain shutdown: pvh_vmx_vmexit_handler->emulate_privileged_op
+     * -> guest_io_read -> pv_pit_handler -> handle_speaker_io -> _spin_lock
+     *  so we call pit_init to initialize the spin lock */
+    if ( v->vcpu_id == 0 )
+        pit_init(v, cpu_khz);
+
+    return 0;
+}
+
 int hvm_vcpu_initialise(struct vcpu *v)
 {
     int rc;
     struct domain *d = v->domain;
-    domid_t dm_domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN];
+    domid_t dm_domid; 
 
     hvm_asid_flush_vcpu(v);
 
+    if ( is_pvh_vcpu(v) )
+        return hvm_pvh_vcpu_initialise(v);
+
     if ( (rc = vlapic_init(v)) != 0 )
         goto fail1;
 
@@ -1084,6 +1151,8 @@ int hvm_vcpu_initialise(struct vcpu *v)
          && (rc = nestedhvm_vcpu_initialise(v)) < 0 ) 
         goto fail3;
 
+    dm_domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN];
+
     /* Create ioreq event channel. */
     rc = alloc_unbound_xen_event_channel(v, dm_domid, NULL);
     if ( rc < 0 )
@@ -1163,7 +1232,10 @@ void hvm_vcpu_destroy(struct vcpu *v)
 
     tasklet_kill(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet);
     hvm_vcpu_cacheattr_destroy(v);
-    vlapic_destroy(v);
+
+    if ( !is_pvh_vcpu(v) )
+        vlapic_destroy(v);
+
     hvm_funcs.vcpu_destroy(v);
 
     /* Event channel is already freed by evtchn_destroy(). */
@@ -4514,6 +4586,8 @@ static int hvm_memory_event_traps(long p, uint32_t reason,
 
 void hvm_memory_event_cr0(unsigned long value, unsigned long old) 
 {
+    if ( is_pvh_vcpu(current) )
+        return;
     hvm_memory_event_traps(current->domain->arch.hvm_domain
                              .params[HVM_PARAM_MEMORY_EVENT_CR0],
                            MEM_EVENT_REASON_CR0,
@@ -4522,6 +4596,8 @@ void hvm_memory_event_cr0(unsigned long value, unsigned long old)
 
 void hvm_memory_event_cr3(unsigned long value, unsigned long old) 
 {
+    if ( is_pvh_vcpu(current) )
+        return;
     hvm_memory_event_traps(current->domain->arch.hvm_domain
                              .params[HVM_PARAM_MEMORY_EVENT_CR3],
                            MEM_EVENT_REASON_CR3,
@@ -4530,6 +4606,8 @@ void hvm_memory_event_cr3(unsigned long value, unsigned long old)
 
 void hvm_memory_event_cr4(unsigned long value, unsigned long old) 
 {
+    if ( is_pvh_vcpu(current) )
+        return;
     hvm_memory_event_traps(current->domain->arch.hvm_domain
                              .params[HVM_PARAM_MEMORY_EVENT_CR4],
                            MEM_EVENT_REASON_CR4,
@@ -4538,6 +4616,8 @@ void hvm_memory_event_cr4(unsigned long value, unsigned long old)
 
 void hvm_memory_event_msr(unsigned long msr, unsigned long value)
 {
+    if ( is_pvh_vcpu(current) )
+        return;
     hvm_memory_event_traps(current->domain->arch.hvm_domain
                              .params[HVM_PARAM_MEMORY_EVENT_MSR],
                            MEM_EVENT_REASON_MSR,
@@ -4550,6 +4630,8 @@ int hvm_memory_event_int3(unsigned long gla)
     unsigned long gfn;
     gfn = paging_gva_to_gfn(current, gla, &pfec);
 
+    if ( is_pvh_vcpu(current) )
+        return 0;
     return hvm_memory_event_traps(current->domain->arch.hvm_domain
                                     .params[HVM_PARAM_MEMORY_EVENT_INT3],
                                   MEM_EVENT_REASON_INT3,
@@ -4562,6 +4644,8 @@ int hvm_memory_event_single_step(unsigned long gla)
     unsigned long gfn;
     gfn = paging_gva_to_gfn(current, gla, &pfec);
 
+    if ( is_pvh_vcpu(current) )
+        return 0;
     return hvm_memory_event_traps(current->domain->arch.hvm_domain
             .params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP],
             MEM_EVENT_REASON_SINGLESTEP,
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 9926ffb..b0bea9c 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -624,7 +624,7 @@ void vmx_vmcs_exit(struct vcpu *v)
     {
         /* Don't confuse vmx_do_resume (for @v or @current!) */
         vmx_clear_vmcs(v);
-        if ( is_hvm_vcpu(current) )
+        if ( is_hvm_or_pvh_vcpu(current) )
             vmx_load_vmcs(current);
 
         spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
@@ -815,6 +815,253 @@ void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val)
     virtual_vmcs_exit(vvmcs);
 }
 
+static int pvh_construct_vmcs(struct vcpu *v)
+{
+    uint16_t sysenter_cs;
+    unsigned long sysenter_eip;
+    struct domain *d = v->domain;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    struct ept_data *ept = &p2m->ept;
+    u32 vmexit_ctl = vmx_vmexit_control;
+    u32 vmentry_ctl = vmx_vmentry_control;
+    u64 required, tmpval = -1;
+
+    if ( !paging_mode_hap(d) )
+    {
+        printk("ERROR: HAP is required to run PV in HVM container\n");
+        return -EINVAL;
+    }
+
+    /* VMCS controls. */
+    vmx_pin_based_exec_control &= ~PIN_BASED_VIRTUAL_NMIS;
+    __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
+
+    v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
+
+    /* if rdtsc exiting is turned on and it goes thru emulate_privileged_op,
+     * then pv_vcpu.ctrlreg must be added to pvh struct */
+    v->arch.hvm_vmx.exec_control &= ~CPU_BASED_RDTSC_EXITING;
+    v->arch.hvm_vmx.exec_control &= ~CPU_BASED_USE_TSC_OFFSETING;
+
+    v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
+                                      CPU_BASED_CR3_LOAD_EXITING |
+                                      CPU_BASED_CR3_STORE_EXITING);
+    v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+    v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
+    v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_MSR_BITMAP;
+    v->arch.hvm_vmx.exec_control &= ~CPU_BASED_TPR_SHADOW;
+    v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+
+    __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+
+    /* I/O access bitmap. */
+    __vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0));
+    __vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE));
+
+    /* MSR access bitmap. */
+    if ( cpu_has_vmx_msr_bitmap )
+    {
+        unsigned long *msr_bitmap = alloc_xenheap_page();
+        int msr_type = MSR_TYPE_R | MSR_TYPE_W;
+
+        if ( msr_bitmap == NULL )
+            return -ENOMEM;
+
+        memset(msr_bitmap, ~0, PAGE_SIZE);
+        v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
+        __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
+
+        vmx_disable_intercept_for_msr(v, MSR_FS_BASE, msr_type);
+        vmx_disable_intercept_for_msr(v, MSR_GS_BASE, msr_type);
+        vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, msr_type);
+        vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, msr_type);
+        vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, msr_type);
+        vmx_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE, msr_type);
+
+        /* pure hvm doesn't do this. safe? see: long_mode_do_msr_write() */
+#if 0
+        vmx_disable_intercept_for_msr(v, MSR_STAR);
+        vmx_disable_intercept_for_msr(v, MSR_LSTAR);
+        vmx_disable_intercept_for_msr(v, MSR_CSTAR);
+        vmx_disable_intercept_for_msr(v, MSR_SYSCALL_MASK);
+#endif
+    } else {
+        printk("PVH: CPU does NOT have msr bitmap\n");
+        return -EINVAL;
+    }
+
+    if ( !cpu_has_vmx_vpid ) {
+        printk("PVH: At present VPID support is required to run PVH\n");
+        return -EINVAL;
+    }
+
+    v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;
+
+    if ( cpu_has_vmx_secondary_exec_control ) {
+        v->arch.hvm_vmx.secondary_exec_control &= ~0x4FF; /* turn off all */
+        v->arch.hvm_vmx.secondary_exec_control |= 
+                                              SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+        v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_VPID;
+
+        v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_EPT;
+        __vmwrite(SECONDARY_VM_EXEC_CONTROL,
+                  v->arch.hvm_vmx.secondary_exec_control);
+    } else {
+        printk("PVH: NO Secondary Exec control\n");
+        return -EINVAL;
+    }
+
+    __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl);
+
+    #define VM_ENTRY_LOAD_DEBUG_CTLS 0x4
+    #define VM_ENTRY_LOAD_EFER 0x8000
+    vmentry_ctl &= ~VM_ENTRY_LOAD_DEBUG_CTLS;
+    vmentry_ctl &= ~VM_ENTRY_LOAD_EFER;
+    vmentry_ctl &= ~VM_ENTRY_SMM;
+    vmentry_ctl &= ~VM_ENTRY_DEACT_DUAL_MONITOR;
+    vmentry_ctl |= VM_ENTRY_IA32E_MODE;
+    __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl);
+
+    /* MSR intercepts. */
+    __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+    __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+    __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+
+    /* Host data selectors. */
+    __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
+    __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
+    __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
+    __vmwrite(HOST_FS_SELECTOR, 0);
+    __vmwrite(HOST_GS_SELECTOR, 0);
+    __vmwrite(HOST_FS_BASE, 0);
+    __vmwrite(HOST_GS_BASE, 0);
+
+    vmx_set_host_env(v);
+
+    /* Host control registers. */
+    v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS;
+    __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
+    __vmwrite(HOST_CR4, mmu_cr4_features|(cpu_has_xsave ? X86_CR4_OSXSAVE : 0));
+
+    /* Host CS:RIP. */
+    __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
+    __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
+
+    /* Host SYSENTER CS:RIP. */
+    rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs);
+    __vmwrite(HOST_SYSENTER_CS, sysenter_cs);
+    rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip);
+    __vmwrite(HOST_SYSENTER_EIP, sysenter_eip);
+
+    __vmwrite(VM_ENTRY_INTR_INFO, 0);
+
+    __vmwrite(CR3_TARGET_COUNT, 0);
+
+    __vmwrite(GUEST_ACTIVITY_STATE, 0);
+
+    /* Set default guest context values here. Some of these are then overwritten
+     * in vmx_pvh_set_vcpu_info() by guest itself during vcpu bringup */
+    __vmwrite(GUEST_CS_BASE, 0);
+    __vmwrite(GUEST_CS_LIMIT, ~0u);
+    __vmwrite(GUEST_CS_AR_BYTES, 0xa09b); /* CS.L == 1 */
+    __vmwrite(GUEST_CS_SELECTOR, 0x10);
+
+    __vmwrite(GUEST_DS_BASE, 0);
+    __vmwrite(GUEST_DS_LIMIT, ~0u);
+    __vmwrite(GUEST_DS_AR_BYTES, 0xc093);
+    __vmwrite(GUEST_DS_SELECTOR, 0x18);
+
+    __vmwrite(GUEST_SS_BASE, 0);         /* use same seg as DS */
+    __vmwrite(GUEST_SS_LIMIT, ~0u);
+    __vmwrite(GUEST_SS_AR_BYTES, 0xc093);
+    __vmwrite(GUEST_SS_SELECTOR, 0x18);
+
+    __vmwrite(GUEST_ES_SELECTOR, 0);
+    __vmwrite(GUEST_FS_SELECTOR, 0);
+    __vmwrite(GUEST_GS_SELECTOR, 0);
+
+    /* Guest segment bases. */
+    __vmwrite(GUEST_ES_BASE, 0);
+    __vmwrite(GUEST_FS_BASE, 0);
+    __vmwrite(GUEST_GS_BASE, 0);
+
+    /* Guest segment limits. */
+    __vmwrite(GUEST_ES_LIMIT, ~0u);
+    __vmwrite(GUEST_FS_LIMIT, ~0u);
+    __vmwrite(GUEST_GS_LIMIT, ~0u);
+
+    /* Guest segment AR bytes. */
+    __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */
+    __vmwrite(GUEST_FS_AR_BYTES, 0xc093);
+    __vmwrite(GUEST_GS_AR_BYTES, 0xc093);
+
+    /* Guest IDT. */
+    __vmwrite(GUEST_GDTR_BASE, 0);
+    __vmwrite(GUEST_GDTR_LIMIT, 0);
+
+    /* Guest LDT. */
+    __vmwrite(GUEST_LDTR_AR_BYTES, 0x82); /* LDT */
+    __vmwrite(GUEST_LDTR_SELECTOR, 0);
+    __vmwrite(GUEST_LDTR_BASE, 0);
+    __vmwrite(GUEST_LDTR_LIMIT, 0);
+
+    /* Guest TSS. */
+    __vmwrite(GUEST_TR_AR_BYTES, 0x8b); /* 32-bit TSS (busy) */
+    __vmwrite(GUEST_TR_BASE, 0);
+    __vmwrite(GUEST_TR_LIMIT, 0xff);
+
+    __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+    __vmwrite(GUEST_DR7, 0);
+    __vmwrite(VMCS_LINK_POINTER, ~0UL);
+
+    __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+    __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
+
+    v->arch.hvm_vmx.exception_bitmap = 
+                                   HVM_TRAP_MASK     | (1 << TRAP_debug) | 
+                                   (1U << TRAP_int3) | (1U << TRAP_no_device);
+    __vmwrite(EXCEPTION_BITMAP, v->arch.hvm_vmx.exception_bitmap);
+
+    __vmwrite(TSC_OFFSET, 0);
+
+    /* Set WP bit so rdonly pages are not written from CPL 0 */
+    tmpval = X86_CR0_PG | X86_CR0_NE | X86_CR0_PE | X86_CR0_WP;
+    __vmwrite(GUEST_CR0, tmpval);
+    __vmwrite(CR0_READ_SHADOW, tmpval);
+    v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0] = tmpval;
+
+    tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features);
+    required = X86_CR4_PAE | X86_CR4_VMXE | X86_CR4_OSFXSR;
+    if ( (tmpval & required) != required )
+    {
+        printk("PVH: required CR4 features not available:%lx\n", required);
+        return -EINVAL;
+    }
+    __vmwrite(GUEST_CR4, tmpval);
+    __vmwrite(CR4_READ_SHADOW, tmpval);
+    v->arch.hvm_vcpu.guest_cr[4] = tmpval;
+
+    __vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
+    __vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
+
+     v->arch.hvm_vmx.vmx_realmode = 0;
+
+    ept->asr  = pagetable_get_pfn(p2m_get_pagetable(p2m));
+    __vmwrite(EPT_POINTER, ept_get_eptp(ept));
+
+    if ( cpu_has_vmx_pat )
+    {
+        u64 host_pat, guest_pat;
+
+        rdmsrl(MSR_IA32_CR_PAT, host_pat);
+        guest_pat = MSR_IA32_CR_PAT_RESET;
+
+        __vmwrite(HOST_PAT, host_pat);
+        __vmwrite(GUEST_PAT, guest_pat);
+    }
+    return 0;
+}
+
 static int construct_vmcs(struct vcpu *v)
 {
     struct domain *d = v->domain;
@@ -825,6 +1072,12 @@ static int construct_vmcs(struct vcpu *v)
 
     vmx_vmcs_enter(v);
 
+    if ( is_pvh_vcpu(v) ) {
+        int rc = pvh_construct_vmcs(v);
+        vmx_vmcs_exit(v);
+        return rc;
+    }
+
     /* VMCS controls. */
     __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
 
@@ -1259,8 +1512,10 @@ void vmx_do_resume(struct vcpu *v)
 
         vmx_clear_vmcs(v);
         vmx_load_vmcs(v);
-        hvm_migrate_timers(v);
-        hvm_migrate_pirqs(v);
+        if ( !is_pvh_vcpu(v) ) {
+            hvm_migrate_timers(v);
+            hvm_migrate_pirqs(v);
+        }
         vmx_set_host_env(v);
         /*
          * Both n1 VMCS and n2 VMCS need to update the host environment after 
@@ -1272,6 +1527,9 @@ void vmx_do_resume(struct vcpu *v)
         hvm_asid_flush_vcpu(v);
     }
 
+    if ( is_pvh_vcpu(v) )
+        reset_stack_and_jump(vmx_asm_do_vmentry);
+
     debug_state = v->domain->debugger_attached
                   || v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_INT3]
                   || v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP];
@@ -1455,7 +1713,7 @@ static void vmcs_dump(unsigned char ch)
 
     for_each_domain ( d )
     {
-        if ( !is_hvm_domain(d) )
+        if ( !is_hvm_or_pvh_domain(d) )
             continue;
         printk("\n>>> Domain %d <<<\n", d->domain_id);
         for_each_vcpu ( d, v )
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index e64980f..194c87b 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -79,6 +79,9 @@ static int vmx_domain_initialise(struct domain *d)
 {
     int rc;
 
+    if ( is_pvh_domain(d) )
+        return 0;
+
     if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
         return rc;
 
@@ -87,6 +90,9 @@ static int vmx_domain_initialise(struct domain *d)
 
 static void vmx_domain_destroy(struct domain *d)
 {
+    if ( is_pvh_domain(d) )
+        return;
+
     vmx_free_vlapic_mapping(d);
 }
 
@@ -110,6 +116,12 @@ static int vmx_vcpu_initialise(struct vcpu *v)
 
     vpmu_initialise(v);
 
+    if (is_pvh_vcpu(v) ) 
+    {
+        /* this for hvm_long_mode_enabled(v) */
+        v->arch.hvm_vcpu.guest_efer = EFER_SCE | EFER_LMA | EFER_LME;
+        return 0;
+    }
     vmx_install_vlapic_mapping(v);
 
     /* %eax == 1 signals full real-mode support to the guest loader. */
@@ -1033,6 +1045,23 @@ static void vmx_update_host_cr3(struct vcpu *v)
     vmx_vmcs_exit(v);
 }
 
+static void vmx_update_pvh_cr(struct vcpu *v, unsigned int cr)
+{
+    vmx_vmcs_enter(v);
+    switch ( cr )
+    {
+        case 3:
+            __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.guest_cr[3]);
+            hvm_asid_flush_vcpu(v);
+            break;
+
+        default:
+            printk("PVH: d%d v%d unexpected cr%d update at rip:%lx\n", 
+                   v->domain->domain_id, v->vcpu_id, cr, __vmread(GUEST_RIP));
+    }
+    vmx_vmcs_exit(v);
+}
+
 void vmx_update_debug_state(struct vcpu *v)
 {
     unsigned long mask;
@@ -1052,6 +1081,11 @@ void vmx_update_debug_state(struct vcpu *v)
 
 static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
 {
+    if ( is_pvh_vcpu(v) ) {
+        vmx_update_pvh_cr(v, cr);
+        return;
+    }
+
     vmx_vmcs_enter(v);
 
     switch ( cr )
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 7+ messages in thread