All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mukesh Rathor <mukesh.rathor@oracle.com>
To: Xen-devel@lists.xensource.com
Subject: [PATCH 16/18] PVH xen: vmcs related changes
Date: Mon, 24 Jun 2013 17:01:45 -0700	[thread overview]
Message-ID: <1372118507-16864-17-git-send-email-mukesh.rathor@oracle.com> (raw)
In-Reply-To: <1372118507-16864-1-git-send-email-mukesh.rathor@oracle.com>

This patch contains vmcs changes related for PVH, mainly creating a VMCS
for PVH guest.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
---
 xen/arch/x86/hvm/vmx/vmcs.c |  254 ++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 250 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 43539a6..f21571c 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -634,7 +634,7 @@ void vmx_vmcs_exit(struct vcpu *v)
     {
         /* Don't confuse vmx_do_resume (for @v or @current!) */
         vmx_clear_vmcs(v);
-        if ( is_hvm_vcpu(current) )
+        if ( !is_pv_vcpu(current) )
             vmx_load_vmcs(current);
 
         spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
@@ -856,6 +856,239 @@ static void vmx_set_common_host_vmcs_fields(struct vcpu *v)
     __vmwrite(HOST_SYSENTER_EIP, sysenter_eip);
 }
 
+static int pvh_check_requirements(struct vcpu *v)
+{
+    u64 required, tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features);
+
+    if ( !paging_mode_hap(v->domain) )
+    {
+        printk(XENLOG_G_INFO "HAP is required for PVH guest.\n");
+        return -EINVAL;
+    }
+    if ( !cpu_has_vmx_pat )
+    {
+        printk(XENLOG_G_INFO "PVH: CPU does not have PAT support\n");
+        return -ENOSYS;
+    }
+    if ( !cpu_has_vmx_msr_bitmap )
+    {
+        printk(XENLOG_G_INFO "PVH: CPU does not have msr bitmap\n");
+        return -ENOSYS;
+    }
+    if ( !cpu_has_vmx_vpid )
+    {
+        printk(XENLOG_G_INFO "PVH: CPU doesn't have VPID support\n");
+        return -ENOSYS;
+    }
+    if ( !cpu_has_vmx_secondary_exec_control )
+    {
+        printk(XENLOG_G_INFO "CPU Secondary exec is required to run PVH\n");
+        return -ENOSYS;
+    }
+
+    if ( v->domain->arch.vtsc )
+    {
+        printk(XENLOG_G_INFO
+                "At present PVH only supports the default timer mode\n");
+        return -ENOSYS;
+    }
+
+    required = X86_CR4_PAE | X86_CR4_VMXE | X86_CR4_OSFXSR;
+    if ( (tmpval & required) != required )
+    {
+        printk(XENLOG_G_INFO "PVH: required CR4 features not available:%lx\n",
+                required);
+        return -ENOSYS;
+    }
+
+    return 0;
+}
+
+static int pvh_construct_vmcs(struct vcpu *v)
+{
+    int rc, msr_type;
+    unsigned long *msr_bitmap;
+    struct domain *d = v->domain;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    struct ept_data *ept = &p2m->ept;
+    u32 vmexit_ctl = vmx_vmexit_control;
+    u32 vmentry_ctl = vmx_vmentry_control;
+    u64 host_pat, tmpval = -1;
+
+    if ( (rc = pvh_check_requirements(v)) )
+        return rc;
+
+    msr_bitmap = alloc_xenheap_page();
+    if ( msr_bitmap == NULL )
+        return -ENOMEM;
+
+    /* 1. Pin-Based Controls: */
+    __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
+
+    v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
+
+    /* 2. Primary Processor-based controls: */
+    /*
+     * If rdtsc exiting is turned on and it goes thru emulate_privileged_op,
+     * then pv_vcpu.ctrlreg must be added to the pvh struct.
+     */
+    v->arch.hvm_vmx.exec_control &= ~CPU_BASED_RDTSC_EXITING;
+    v->arch.hvm_vmx.exec_control &= ~CPU_BASED_USE_TSC_OFFSETING;
+
+    v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
+                                      CPU_BASED_CR3_LOAD_EXITING |
+                                      CPU_BASED_CR3_STORE_EXITING);
+    v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+    v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
+    v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_MSR_BITMAP;
+    v->arch.hvm_vmx.exec_control &= ~CPU_BASED_TPR_SHADOW;
+    v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+
+    __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+
+    /* 3. Secondary Processor-based controls (Intel SDM: resvd bits are 0): */
+    v->arch.hvm_vmx.secondary_exec_control = SECONDARY_EXEC_ENABLE_EPT;
+    v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_VPID;
+    v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+
+    __vmwrite(SECONDARY_VM_EXEC_CONTROL,
+              v->arch.hvm_vmx.secondary_exec_control);
+
+    __vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0));
+    __vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE));
+
+    /* MSR bitmap for intercepts. */
+    memset(msr_bitmap, ~0, PAGE_SIZE);
+    v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
+    __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
+
+    msr_type = MSR_TYPE_R | MSR_TYPE_W;
+    /* Disable interecepts for MSRs that have corresponding VMCS fields. */
+    vmx_disable_intercept_for_msr(v, MSR_FS_BASE, msr_type);
+    vmx_disable_intercept_for_msr(v, MSR_GS_BASE, msr_type);
+    vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, msr_type);
+    vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, msr_type);
+    vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, msr_type);
+    vmx_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE, msr_type);
+    vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, msr_type);
+
+    /*
+     * We don't disable intercepts for MSRs: MSR_STAR, MSR_LSTAR, MSR_CSTAR,
+     * and MSR_SYSCALL_MASK because we need to specify save/restore area to
+     * save/restore at every VM exit and entry. Instead, let the intercept
+     * functions save them into vmx_msr_state fields. See comment in
+     * vmx_restore_host_msrs(). See also vmx_restore_guest_msrs().
+     */
+    __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+    __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+    __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+
+    __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl);
+
+    /*
+     * Note: we run with default VM_ENTRY_LOAD_DEBUG_CTLS of 1, which means
+     * upon vmentry, the cpu reads/loads VMCS.DR7 and VMCS.DEBUGCTLS, and not
+     * use the host values. 0 would cause it to not use the VMCS values.
+     */
+    vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_EFER;
+    vmentry_ctl &= ~VM_ENTRY_SMM;
+    vmentry_ctl &= ~VM_ENTRY_DEACT_DUAL_MONITOR;
+    /* PVH 32bitfixme. */
+    vmentry_ctl |= VM_ENTRY_IA32E_MODE;       /* GUEST_EFER.LME/LMA ignored */
+
+    __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl);
+
+    vmx_set_common_host_vmcs_fields(v);
+
+    __vmwrite(VM_ENTRY_INTR_INFO, 0);
+    __vmwrite(CR3_TARGET_COUNT, 0);
+    __vmwrite(GUEST_ACTIVITY_STATE, 0);
+
+    /* These are sorta irrelevant as we load the discriptors directly. */
+    __vmwrite(GUEST_CS_SELECTOR, 0);
+    __vmwrite(GUEST_DS_SELECTOR, 0);
+    __vmwrite(GUEST_SS_SELECTOR, 0);
+    __vmwrite(GUEST_ES_SELECTOR, 0);
+    __vmwrite(GUEST_FS_SELECTOR, 0);
+    __vmwrite(GUEST_GS_SELECTOR, 0);
+
+    __vmwrite(GUEST_CS_BASE, 0);
+    __vmwrite(GUEST_CS_LIMIT, ~0u);
+    /* CS.L == 1, exec, read/write, accessed. PVH 32bitfixme. */
+    __vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+
+    __vmwrite(GUEST_DS_BASE, 0);
+    __vmwrite(GUEST_DS_LIMIT, ~0u);
+    __vmwrite(GUEST_DS_AR_BYTES, 0xc093); /* read/write, accessed */
+
+    __vmwrite(GUEST_SS_BASE, 0);
+    __vmwrite(GUEST_SS_LIMIT, ~0u);
+    __vmwrite(GUEST_SS_AR_BYTES, 0xc093); /* read/write, accessed */
+
+    __vmwrite(GUEST_ES_BASE, 0);
+    __vmwrite(GUEST_ES_LIMIT, ~0u);
+    __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */
+
+    __vmwrite(GUEST_FS_BASE, 0);
+    __vmwrite(GUEST_FS_LIMIT, ~0u);
+    __vmwrite(GUEST_FS_AR_BYTES, 0xc093); /* read/write, accessed */
+
+    __vmwrite(GUEST_GS_BASE, 0);
+    __vmwrite(GUEST_GS_LIMIT, ~0u);
+    __vmwrite(GUEST_GS_AR_BYTES, 0xc093); /* read/write, accessed */
+
+    __vmwrite(GUEST_GDTR_BASE, 0);
+    __vmwrite(GUEST_GDTR_LIMIT, 0);
+
+    __vmwrite(GUEST_LDTR_BASE, 0);
+    __vmwrite(GUEST_LDTR_LIMIT, 0);
+    __vmwrite(GUEST_LDTR_AR_BYTES, 0x82); /* LDT */
+    __vmwrite(GUEST_LDTR_SELECTOR, 0);
+
+    /* Guest TSS. */
+    __vmwrite(GUEST_TR_BASE, 0);
+    __vmwrite(GUEST_TR_LIMIT, 0xff);
+    __vmwrite(GUEST_TR_AR_BYTES, 0x8b); /* 32-bit TSS (busy) */
+
+    __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+    __vmwrite(GUEST_DR7, 0);
+    __vmwrite(VMCS_LINK_POINTER, ~0UL);
+
+    __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+    __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
+
+    v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK | (1U << TRAP_debug) |
+                                   (1U << TRAP_int3) | (1U << TRAP_no_device);
+    __vmwrite(EXCEPTION_BITMAP, v->arch.hvm_vmx.exception_bitmap);
+
+    /* Set WP bit so rdonly pages are not written from CPL 0. */
+    tmpval = X86_CR0_PG | X86_CR0_NE | X86_CR0_PE | X86_CR0_WP;
+    __vmwrite(GUEST_CR0, tmpval);
+    __vmwrite(CR0_READ_SHADOW, tmpval);
+    v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0] = tmpval;
+
+    tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features);
+    __vmwrite(GUEST_CR4, tmpval);
+    __vmwrite(CR4_READ_SHADOW, tmpval);
+    v->arch.hvm_vcpu.guest_cr[4] = tmpval;
+
+    __vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
+    __vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
+
+     v->arch.hvm_vmx.vmx_realmode = 0;
+
+    ept->asr  = pagetable_get_pfn(p2m_get_pagetable(p2m));
+    __vmwrite(EPT_POINTER, ept_get_eptp(ept));
+
+    rdmsrl(MSR_IA32_CR_PAT, host_pat);
+    __vmwrite(HOST_PAT, host_pat);
+    __vmwrite(GUEST_PAT, MSR_IA32_CR_PAT_RESET);
+
+    /* The paging mode is updated for PVH by arch_set_info_guest(). */
+
+    return 0;
+}
+
 static int construct_vmcs(struct vcpu *v)
 {
     struct domain *d = v->domain;
@@ -864,6 +1097,13 @@ static int construct_vmcs(struct vcpu *v)
 
     vmx_vmcs_enter(v);
 
+    if ( is_pvh_vcpu(v) )
+    {
+        int rc = pvh_construct_vmcs(v);
+        vmx_vmcs_exit(v);
+        return rc;
+    }
+
     /* VMCS controls. */
     __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
 
@@ -1281,8 +1521,11 @@ void vmx_do_resume(struct vcpu *v)
 
         vmx_clear_vmcs(v);
         vmx_load_vmcs(v);
-        hvm_migrate_timers(v);
-        hvm_migrate_pirqs(v);
+        if ( !is_pvh_vcpu(v) )
+        {
+            hvm_migrate_timers(v);
+            hvm_migrate_pirqs(v);
+        }
         vmx_set_host_env(v);
         /*
          * Both n1 VMCS and n2 VMCS need to update the host environment after 
@@ -1294,6 +1537,9 @@ void vmx_do_resume(struct vcpu *v)
         hvm_asid_flush_vcpu(v);
     }
 
+    if ( is_pvh_vcpu(v) )
+        reset_stack_and_jump(vmx_asm_do_vmentry);
+
     debug_state = v->domain->debugger_attached
                   || v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_INT3]
                   || v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP];
@@ -1477,7 +1723,7 @@ static void vmcs_dump(unsigned char ch)
 
     for_each_domain ( d )
     {
-        if ( !is_hvm_domain(d) )
+        if ( is_pv_domain(d) )
             continue;
         printk("\n>>> Domain %d <<<\n", d->domain_id);
         for_each_vcpu ( d, v )
-- 
1.7.2.3

  parent reply	other threads:[~2013-06-25  0:01 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-06-25  0:01 [PATCH 00/18][V7]: PVH xen: Phase I, Version 7 patches Mukesh Rathor
2013-06-25  0:01 ` [PATCH 01/18] PVH xen: turn gdb_frames/gdt_ents into union Mukesh Rathor
2013-06-25  8:40   ` Jan Beulich
2013-06-25  0:01 ` [PATCH 02/18] PVH xen: add params to read_segment_register Mukesh Rathor
2013-06-25  0:01 ` [PATCH 03/18] PVH xen: Move e820 fields out of pv_domain struct Mukesh Rathor
2013-06-25  8:44   ` Jan Beulich
2013-06-25  0:01 ` [PATCH 04/18] PVH xen: vmx related preparatory changes for PVH Mukesh Rathor
2013-06-25  8:48   ` Jan Beulich
2013-06-25  0:01 ` [PATCH 05/18] PVH xen: hvm/vmcs " Mukesh Rathor
2013-06-25  8:51   ` Jan Beulich
2013-06-25  0:01 ` [PATCH 06/18] PVH xen: Introduce PVH guest type and some basic changes Mukesh Rathor
2013-06-25  9:01   ` Jan Beulich
2013-06-26  1:14     ` Mukesh Rathor
2013-06-26  8:18       ` Jan Beulich
2013-06-25  0:01 ` [PATCH 07/18] PVH xen: domain create, schedular related code changes Mukesh Rathor
2013-06-25  9:13   ` Jan Beulich
2013-06-25  0:01 ` [PATCH 08/18] PVH xen: support invalid op emulation for PVH Mukesh Rathor
2013-06-25  9:16   ` Jan Beulich
2013-06-25  0:01 ` [PATCH 09/18] PVH xen: Support privileged " Mukesh Rathor
2013-06-25  9:36   ` Jan Beulich
2013-06-26 22:41     ` Mukesh Rathor
2013-06-27  7:22       ` Jan Beulich
2013-06-27 23:43         ` Mukesh Rathor
2013-06-28  9:20           ` Jan Beulich
2013-07-03  1:38             ` Mukesh Rathor
2013-07-03 10:21               ` Jan Beulich
2013-07-04  2:00                 ` Mukesh Rathor
2013-07-04  8:04                   ` Jan Beulich
2013-07-06  1:43                     ` Mukesh Rathor
2013-06-25  0:01 ` [PATCH 10/18] PVH xen: interrupt/event-channel delivery to PVH Mukesh Rathor
2013-06-25 14:29   ` Konrad Rzeszutek Wilk
2013-07-12  0:29     ` Mukesh Rathor
2013-06-25  0:01 ` [PATCH 11/18] PVH xen: additional changes to support PVH guest creation and execution Mukesh Rathor
2013-06-25  0:01 ` [PATCH 12/18] PVH xen: mapcache and show registers Mukesh Rathor
2013-06-25  9:45   ` Jan Beulich
2013-06-25  0:01 ` [PATCH 13/18] PVH xen: mtrr, tsc, grant changes Mukesh Rathor
2013-06-25 14:30   ` Konrad Rzeszutek Wilk
2013-06-25  0:01 ` [PATCH 14/18] PVH xen: Checks, asserts, and limitations for PVH Mukesh Rathor
2013-06-25  9:54   ` Jan Beulich
2013-06-27  2:43     ` Mukesh Rathor
2013-06-27  7:25       ` Jan Beulich
2013-06-25  0:01 ` [PATCH 15/18] PVH xen: add hypercall support " Mukesh Rathor
2013-06-25 10:12   ` Jan Beulich
2013-06-27  3:09     ` Mukesh Rathor
2013-06-27  7:29       ` Jan Beulich
2013-06-25  0:01 ` Mukesh Rathor [this message]
2013-06-25 10:17   ` [PATCH 16/18] PVH xen: vmcs related changes Jan Beulich
2013-06-25  0:01 ` [PATCH 17/18] PVH xen: HVM support of PVH guest creation/destruction Mukesh Rathor
2013-06-25  0:01 ` [PATCH 18/18] PVH xen: introduce vmx_pvh.c Mukesh Rathor
2013-06-25 10:49   ` Jan Beulich
2013-06-27  3:30     ` Mukesh Rathor
2013-06-27  7:41       ` Jan Beulich
2013-06-28  1:28         ` Mukesh Rathor
2013-06-28  9:26           ` Jan Beulich
2013-06-28  1:35     ` Mukesh Rathor
2013-06-28  9:31       ` Jan Beulich
2013-06-29  3:03         ` Mukesh Rathor
2013-07-01  8:49           ` Jan Beulich
2013-07-06  1:31         ` Mukesh Rathor
2013-07-08  8:31           ` Jan Beulich
2013-07-08 23:09             ` Mukesh Rathor
2013-07-09  0:01               ` Mukesh Rathor
2013-07-09  7:31                 ` Jan Beulich
2013-07-10  0:33                   ` Mukesh Rathor
2013-07-10  7:20                     ` Jan Beulich
2013-06-28  2:28     ` Mukesh Rathor
2013-06-28  9:44       ` Jan Beulich
2013-06-29  3:04         ` Mukesh Rathor
2013-07-01  8:54           ` Jan Beulich
2013-07-02  2:01             ` Mukesh Rathor
2013-07-03  1:40         ` Mukesh Rathor
2013-07-03 10:25           ` Jan Beulich
2013-07-04  2:02             ` Mukesh Rathor
2013-07-04  8:07               ` Jan Beulich
2013-07-16  2:00         ` Mukesh Rathor
2013-07-16  6:50           ` Jan Beulich
2013-07-17  0:47             ` Mukesh Rathor
2013-07-17  6:36               ` Jan Beulich
2013-06-25 10:17 ` [PATCH 00/18][V7]: PVH xen: Phase I, Version 7 patches George Dunlap
2013-06-26  0:04   ` Mukesh Rathor

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1372118507-16864-17-git-send-email-mukesh.rathor@oracle.com \
    --to=mukesh.rathor@oracle.com \
    --cc=Xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.