[PATCH 0/3] VMX: misc adjustments

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/3] VMX: misc adjustments
@ 2015-10-19 15:10 Jan Beulich
  2015-10-19 15:21 ` [PATCH 1/3] VMX: re-order definitions Jan Beulich
                   ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Jan Beulich @ 2015-10-19 15:10 UTC (permalink / raw)
  To: xen-devel; +Cc: Kevin Tian, Jun Nakajima

1: VMX: re-order definitions
2: VMX: allocate VMCS pages from domain heap
3: vVMX: use latched VMCS machine address

Signed-off-by: Jan Beulich <jbeulich@suse.com>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/3] VMX: re-order definitions
  2015-10-19 15:10 [PATCH 0/3] VMX: misc adjustments Jan Beulich
@ 2015-10-19 15:21 ` Jan Beulich
  2015-10-21  3:04   ` Tian, Kevin
  2015-10-19 15:22 ` [PATCH 2/3] VMX: allocate VMCS pages from domain heap Jan Beulich
  2015-10-19 15:23 ` [PATCH 3/3] vVMX: use latched VMCS machine address Jan Beulich
  2 siblings, 1 reply; 26+ messages in thread
From: Jan Beulich @ 2015-10-19 15:21 UTC (permalink / raw)
  To: xen-devel; +Cc: Kevin Tian, Jun Nakajima

[-- Attachment #1: Type: text/plain, Size: 4363 bytes --]

... so they end up reasonably sorted, easing lookup.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -227,27 +227,26 @@ extern u32 vmx_vmentry_control;
 #define SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS   0x00040000
 extern u32 vmx_secondary_exec_control;
 
-#define VMX_EPT_EXEC_ONLY_SUPPORTED             0x00000001
-#define VMX_EPT_WALK_LENGTH_4_SUPPORTED         0x00000040
-#define VMX_EPT_MEMORY_TYPE_UC                  0x00000100
-#define VMX_EPT_MEMORY_TYPE_WB                  0x00004000
-#define VMX_EPT_SUPERPAGE_2MB                   0x00010000
-#define VMX_EPT_SUPERPAGE_1GB                   0x00020000
-#define VMX_EPT_INVEPT_INSTRUCTION              0x00100000
-#define VMX_EPT_INVEPT_SINGLE_CONTEXT           0x02000000
-#define VMX_EPT_INVEPT_ALL_CONTEXT              0x04000000
-#define VMX_EPT_AD_BIT                          0x00200000
+#define VMX_EPT_EXEC_ONLY_SUPPORTED                         0x00000001
+#define VMX_EPT_WALK_LENGTH_4_SUPPORTED                     0x00000040
+#define VMX_EPT_MEMORY_TYPE_UC                              0x00000100
+#define VMX_EPT_MEMORY_TYPE_WB                              0x00004000
+#define VMX_EPT_SUPERPAGE_2MB                               0x00010000
+#define VMX_EPT_SUPERPAGE_1GB                               0x00020000
+#define VMX_EPT_INVEPT_INSTRUCTION                          0x00100000
+#define VMX_EPT_AD_BIT                                      0x00200000
+#define VMX_EPT_INVEPT_SINGLE_CONTEXT                       0x02000000
+#define VMX_EPT_INVEPT_ALL_CONTEXT                          0x04000000
+#define VMX_VPID_INVVPID_INSTRUCTION                     0x00100000000ULL
+#define VMX_VPID_INVVPID_INDIVIDUAL_ADDR                 0x10000000000ULL
+#define VMX_VPID_INVVPID_SINGLE_CONTEXT                  0x20000000000ULL
+#define VMX_VPID_INVVPID_ALL_CONTEXT                     0x40000000000ULL
+#define VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL 0x80000000000ULL
+extern u64 vmx_ept_vpid_cap;
 
+#define VMX_MISC_CR3_TARGET                     0x01ff0000
 #define VMX_MISC_VMWRITE_ALL                    0x20000000
 
-#define VMX_VPID_INVVPID_INSTRUCTION                        0x100000000ULL
-#define VMX_VPID_INVVPID_INDIVIDUAL_ADDR                    0x10000000000ULL
-#define VMX_VPID_INVVPID_SINGLE_CONTEXT                     0x20000000000ULL
-#define VMX_VPID_INVVPID_ALL_CONTEXT                        0x40000000000ULL
-#define VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL    0x80000000000ULL
-
-#define VMX_MISC_CR3_TARGET             0x1ff0000
-
 #define cpu_has_wbinvd_exiting \
     (vmx_secondary_exec_control & SECONDARY_EXEC_WBINVD_EXITING)
 #define cpu_has_vmx_virtualize_apic_accesses \
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -246,7 +246,6 @@ static inline unsigned long pi_get_pir(s
 #define MODRM_EAX_07    ".byte 0x38\n" /* [EAX], with reg/opcode: /7 */
 #define MODRM_EAX_ECX   ".byte 0xc1\n" /* EAX, ECX */
 
-extern u64 vmx_ept_vpid_cap;
 extern uint8_t posted_intr_vector;
 
 #define cpu_has_vmx_ept_exec_only_supported        \
@@ -254,15 +253,11 @@ extern uint8_t posted_intr_vector;
 
 #define cpu_has_vmx_ept_wl4_supported           \
     (vmx_ept_vpid_cap & VMX_EPT_WALK_LENGTH_4_SUPPORTED)
-#define cpu_has_vmx_ept_mt_uc                   \
-    (vmx_ept_vpid_cap & VMX_EPT_MEMORY_TYPE_UC)
-#define cpu_has_vmx_ept_mt_wb                   \
-    (vmx_ept_vpid_cap & VMX_EPT_MEMORY_TYPE_WB)
-#define cpu_has_vmx_ept_1gb                     \
-    (vmx_ept_vpid_cap & VMX_EPT_SUPERPAGE_1GB)
-#define cpu_has_vmx_ept_2mb                     \
-    (vmx_ept_vpid_cap & VMX_EPT_SUPERPAGE_2MB)
-#define cpu_has_vmx_ept_ad (vmx_ept_vpid_cap & VMX_EPT_AD_BIT)
+#define cpu_has_vmx_ept_mt_uc (vmx_ept_vpid_cap & VMX_EPT_MEMORY_TYPE_UC)
+#define cpu_has_vmx_ept_mt_wb (vmx_ept_vpid_cap & VMX_EPT_MEMORY_TYPE_WB)
+#define cpu_has_vmx_ept_2mb   (vmx_ept_vpid_cap & VMX_EPT_SUPERPAGE_2MB)
+#define cpu_has_vmx_ept_1gb   (vmx_ept_vpid_cap & VMX_EPT_SUPERPAGE_1GB)
+#define cpu_has_vmx_ept_ad    (vmx_ept_vpid_cap & VMX_EPT_AD_BIT)
 #define cpu_has_vmx_ept_invept_single_context   \
     (vmx_ept_vpid_cap & VMX_EPT_INVEPT_SINGLE_CONTEXT)
 



[-- Attachment #2: VMX-reorder-defines.patch --]
[-- Type: text/plain, Size: 4388 bytes --]

VMX: re-order definitions

... so they end up reasonably sorted, easing lookup.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -227,27 +227,26 @@ extern u32 vmx_vmentry_control;
 #define SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS   0x00040000
 extern u32 vmx_secondary_exec_control;
 
-#define VMX_EPT_EXEC_ONLY_SUPPORTED             0x00000001
-#define VMX_EPT_WALK_LENGTH_4_SUPPORTED         0x00000040
-#define VMX_EPT_MEMORY_TYPE_UC                  0x00000100
-#define VMX_EPT_MEMORY_TYPE_WB                  0x00004000
-#define VMX_EPT_SUPERPAGE_2MB                   0x00010000
-#define VMX_EPT_SUPERPAGE_1GB                   0x00020000
-#define VMX_EPT_INVEPT_INSTRUCTION              0x00100000
-#define VMX_EPT_INVEPT_SINGLE_CONTEXT           0x02000000
-#define VMX_EPT_INVEPT_ALL_CONTEXT              0x04000000
-#define VMX_EPT_AD_BIT                          0x00200000
+#define VMX_EPT_EXEC_ONLY_SUPPORTED                         0x00000001
+#define VMX_EPT_WALK_LENGTH_4_SUPPORTED                     0x00000040
+#define VMX_EPT_MEMORY_TYPE_UC                              0x00000100
+#define VMX_EPT_MEMORY_TYPE_WB                              0x00004000
+#define VMX_EPT_SUPERPAGE_2MB                               0x00010000
+#define VMX_EPT_SUPERPAGE_1GB                               0x00020000
+#define VMX_EPT_INVEPT_INSTRUCTION                          0x00100000
+#define VMX_EPT_AD_BIT                                      0x00200000
+#define VMX_EPT_INVEPT_SINGLE_CONTEXT                       0x02000000
+#define VMX_EPT_INVEPT_ALL_CONTEXT                          0x04000000
+#define VMX_VPID_INVVPID_INSTRUCTION                     0x00100000000ULL
+#define VMX_VPID_INVVPID_INDIVIDUAL_ADDR                 0x10000000000ULL
+#define VMX_VPID_INVVPID_SINGLE_CONTEXT                  0x20000000000ULL
+#define VMX_VPID_INVVPID_ALL_CONTEXT                     0x40000000000ULL
+#define VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL 0x80000000000ULL
+extern u64 vmx_ept_vpid_cap;
 
+#define VMX_MISC_CR3_TARGET                     0x01ff0000
 #define VMX_MISC_VMWRITE_ALL                    0x20000000
 
-#define VMX_VPID_INVVPID_INSTRUCTION                        0x100000000ULL
-#define VMX_VPID_INVVPID_INDIVIDUAL_ADDR                    0x10000000000ULL
-#define VMX_VPID_INVVPID_SINGLE_CONTEXT                     0x20000000000ULL
-#define VMX_VPID_INVVPID_ALL_CONTEXT                        0x40000000000ULL
-#define VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL    0x80000000000ULL
-
-#define VMX_MISC_CR3_TARGET             0x1ff0000
-
 #define cpu_has_wbinvd_exiting \
     (vmx_secondary_exec_control & SECONDARY_EXEC_WBINVD_EXITING)
 #define cpu_has_vmx_virtualize_apic_accesses \
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -246,7 +246,6 @@ static inline unsigned long pi_get_pir(s
 #define MODRM_EAX_07    ".byte 0x38\n" /* [EAX], with reg/opcode: /7 */
 #define MODRM_EAX_ECX   ".byte 0xc1\n" /* EAX, ECX */
 
-extern u64 vmx_ept_vpid_cap;
 extern uint8_t posted_intr_vector;
 
 #define cpu_has_vmx_ept_exec_only_supported        \
@@ -254,15 +253,11 @@ extern uint8_t posted_intr_vector;
 
 #define cpu_has_vmx_ept_wl4_supported           \
     (vmx_ept_vpid_cap & VMX_EPT_WALK_LENGTH_4_SUPPORTED)
-#define cpu_has_vmx_ept_mt_uc                   \
-    (vmx_ept_vpid_cap & VMX_EPT_MEMORY_TYPE_UC)
-#define cpu_has_vmx_ept_mt_wb                   \
-    (vmx_ept_vpid_cap & VMX_EPT_MEMORY_TYPE_WB)
-#define cpu_has_vmx_ept_1gb                     \
-    (vmx_ept_vpid_cap & VMX_EPT_SUPERPAGE_1GB)
-#define cpu_has_vmx_ept_2mb                     \
-    (vmx_ept_vpid_cap & VMX_EPT_SUPERPAGE_2MB)
-#define cpu_has_vmx_ept_ad (vmx_ept_vpid_cap & VMX_EPT_AD_BIT)
+#define cpu_has_vmx_ept_mt_uc (vmx_ept_vpid_cap & VMX_EPT_MEMORY_TYPE_UC)
+#define cpu_has_vmx_ept_mt_wb (vmx_ept_vpid_cap & VMX_EPT_MEMORY_TYPE_WB)
+#define cpu_has_vmx_ept_2mb   (vmx_ept_vpid_cap & VMX_EPT_SUPERPAGE_2MB)
+#define cpu_has_vmx_ept_1gb   (vmx_ept_vpid_cap & VMX_EPT_SUPERPAGE_1GB)
+#define cpu_has_vmx_ept_ad    (vmx_ept_vpid_cap & VMX_EPT_AD_BIT)
 #define cpu_has_vmx_ept_invept_single_context   \
     (vmx_ept_vpid_cap & VMX_EPT_INVEPT_SINGLE_CONTEXT)
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 2/3] VMX: allocate VMCS pages from domain heap
  2015-10-19 15:10 [PATCH 0/3] VMX: misc adjustments Jan Beulich
  2015-10-19 15:21 ` [PATCH 1/3] VMX: re-order definitions Jan Beulich
@ 2015-10-19 15:22 ` Jan Beulich
  2015-10-20 10:12   ` Andrew Cooper
  2015-10-19 15:23 ` [PATCH 3/3] vVMX: use latched VMCS machine address Jan Beulich
  2 siblings, 1 reply; 26+ messages in thread
From: Jan Beulich @ 2015-10-19 15:22 UTC (permalink / raw)
  To: xen-devel; +Cc: Kevin Tian, Jun Nakajima

[-- Attachment #1: Type: text/plain, Size: 11851 bytes --]

There being only very few uses of the virtual address of a VMCS,
convert these cases to establish a mapping and lift the Xen heap
restriction from the VMCS allocation.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -119,8 +119,8 @@ const u32 vmx_introspection_force_enable
 const unsigned int vmx_introspection_force_enabled_msrs_size =
     ARRAY_SIZE(vmx_introspection_force_enabled_msrs);
 
-static DEFINE_PER_CPU_READ_MOSTLY(struct vmcs_struct *, vmxon_region);
-static DEFINE_PER_CPU(struct vmcs_struct *, current_vmcs);
+static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, vmxon_region);
+static DEFINE_PER_CPU(paddr_t, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
 DEFINE_PER_CPU(bool_t, vmxon);
 
@@ -483,25 +483,28 @@ static int vmx_init_vmcs_config(void)
     return 0;
 }
 
-static struct vmcs_struct *vmx_alloc_vmcs(void)
+static paddr_t vmx_alloc_vmcs(void)
 {
+    struct page_info *pg;
     struct vmcs_struct *vmcs;
 
-    if ( (vmcs = alloc_xenheap_page()) == NULL )
+    if ( (pg = alloc_domheap_page(NULL, 0)) == NULL )
     {
         gdprintk(XENLOG_WARNING, "Failed to allocate VMCS.\n");
-        return NULL;
+        return 0;
     }
 
+    vmcs = __map_domain_page(pg);
     clear_page(vmcs);
     vmcs->vmcs_revision_id = vmcs_revision_id;
+    unmap_domain_page(vmcs);
 
-    return vmcs;
+    return page_to_maddr(pg);
 }
 
-static void vmx_free_vmcs(struct vmcs_struct *vmcs)
+static void vmx_free_vmcs(paddr_t pa)
 {
-    free_xenheap_page(vmcs);
+    free_domheap_page(maddr_to_page(pa));
 }
 
 static void __vmx_clear_vmcs(void *info)
@@ -514,7 +517,7 @@ static void __vmx_clear_vmcs(void *info)
 
     if ( arch_vmx->active_cpu == smp_processor_id() )
     {
-        __vmpclear(virt_to_maddr(arch_vmx->vmcs));
+        __vmpclear(arch_vmx->vmcs_pa);
         if ( arch_vmx->vmcs_shadow_maddr )
             __vmpclear(arch_vmx->vmcs_shadow_maddr);
 
@@ -523,8 +526,8 @@ static void __vmx_clear_vmcs(void *info)
 
         list_del(&arch_vmx->active_list);
 
-        if ( arch_vmx->vmcs == this_cpu(current_vmcs) )
-            this_cpu(current_vmcs) = NULL;
+        if ( arch_vmx->vmcs_pa == this_cpu(current_vmcs) )
+            this_cpu(current_vmcs) = 0;
     }
 }
 
@@ -550,8 +553,8 @@ static void vmx_load_vmcs(struct vcpu *v
 
     ASSERT(v->arch.hvm_vmx.active_cpu == smp_processor_id());
 
-    __vmptrld(virt_to_maddr(v->arch.hvm_vmx.vmcs));
-    this_cpu(current_vmcs) = v->arch.hvm_vmx.vmcs;
+    __vmptrld(v->arch.hvm_vmx.vmcs_pa);
+    this_cpu(current_vmcs) = v->arch.hvm_vmx.vmcs_pa;
 
     local_irq_restore(flags);
 }
@@ -565,11 +568,11 @@ int vmx_cpu_up_prepare(unsigned int cpu)
     if ( nvmx_cpu_up_prepare(cpu) != 0 )
         printk("CPU%d: Could not allocate virtual VMCS buffer.\n", cpu);
 
-    if ( per_cpu(vmxon_region, cpu) != NULL )
+    if ( per_cpu(vmxon_region, cpu) )
         return 0;
 
     per_cpu(vmxon_region, cpu) = vmx_alloc_vmcs();
-    if ( per_cpu(vmxon_region, cpu) != NULL )
+    if ( per_cpu(vmxon_region, cpu) )
         return 0;
 
     printk("CPU%d: Could not allocate host VMCS\n", cpu);
@@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
 void vmx_cpu_dead(unsigned int cpu)
 {
     vmx_free_vmcs(per_cpu(vmxon_region, cpu));
-    per_cpu(vmxon_region, cpu) = NULL;
+    per_cpu(vmxon_region, cpu) = 0;
     nvmx_cpu_dead(cpu);
 }
 
@@ -638,7 +641,7 @@ int vmx_cpu_up(void)
     if ( (rc = vmx_cpu_up_prepare(cpu)) != 0 )
         return rc;
 
-    switch ( __vmxon(virt_to_maddr(this_cpu(vmxon_region))) )
+    switch ( __vmxon(this_cpu(vmxon_region)) )
     {
     case -2: /* #UD or #GP */
         if ( bios_locked &&
@@ -710,7 +713,7 @@ bool_t vmx_vmcs_try_enter(struct vcpu *v
      * vmx_vmcs_enter/exit and scheduling tail critical regions.
      */
     if ( likely(v == current) )
-        return v->arch.hvm_vmx.vmcs == this_cpu(current_vmcs);
+        return v->arch.hvm_vmx.vmcs_pa == this_cpu(current_vmcs);
 
     fv = &this_cpu(foreign_vmcs);
 
@@ -906,17 +909,17 @@ int vmx_check_msr_bitmap(unsigned long *
 /*
  * Switch VMCS between layer 1 & 2 guest
  */
-void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to)
+void vmx_vmcs_switch(paddr_t from, paddr_t to)
 {
     struct arch_vmx_struct *vmx = &current->arch.hvm_vmx;
     spin_lock(&vmx->vmcs_lock);
 
-    __vmpclear(virt_to_maddr(from));
+    __vmpclear(from);
     if ( vmx->vmcs_shadow_maddr )
         __vmpclear(vmx->vmcs_shadow_maddr);
-    __vmptrld(virt_to_maddr(to));
+    __vmptrld(to);
 
-    vmx->vmcs = to;
+    vmx->vmcs_pa = to;
     vmx->launched = 0;
     this_cpu(current_vmcs) = to;
 
@@ -936,11 +939,11 @@ void virtual_vmcs_enter(void *vvmcs)
 
 void virtual_vmcs_exit(void *vvmcs)
 {
-    struct vmcs_struct *cur = this_cpu(current_vmcs);
+    paddr_t cur = this_cpu(current_vmcs);
 
     __vmpclear(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
     if ( cur )
-        __vmptrld(virt_to_maddr(cur));
+        __vmptrld(cur);
 
 }
 
@@ -1558,17 +1561,17 @@ int vmx_create_vmcs(struct vcpu *v)
     struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
     int rc;
 
-    if ( (arch_vmx->vmcs = vmx_alloc_vmcs()) == NULL )
+    if ( (arch_vmx->vmcs_pa = vmx_alloc_vmcs()) == 0 )
         return -ENOMEM;
 
     INIT_LIST_HEAD(&arch_vmx->active_list);
-    __vmpclear(virt_to_maddr(arch_vmx->vmcs));
+    __vmpclear(arch_vmx->vmcs_pa);
     arch_vmx->active_cpu = -1;
     arch_vmx->launched   = 0;
 
     if ( (rc = construct_vmcs(v)) != 0 )
     {
-        vmx_free_vmcs(arch_vmx->vmcs);
+        vmx_free_vmcs(arch_vmx->vmcs_pa);
         return rc;
     }
 
@@ -1581,7 +1584,7 @@ void vmx_destroy_vmcs(struct vcpu *v)
 
     vmx_clear_vmcs(v);
 
-    vmx_free_vmcs(arch_vmx->vmcs);
+    vmx_free_vmcs(arch_vmx->vmcs_pa);
 
     free_xenheap_page(v->arch.hvm_vmx.host_msr_area);
     free_xenheap_page(v->arch.hvm_vmx.msr_area);
@@ -1612,7 +1615,7 @@ void vmx_do_resume(struct vcpu *v)
 
     if ( v->arch.hvm_vmx.active_cpu == smp_processor_id() )
     {
-        if ( v->arch.hvm_vmx.vmcs != this_cpu(current_vmcs) )
+        if ( v->arch.hvm_vmx.vmcs_pa != this_cpu(current_vmcs) )
             vmx_load_vmcs(v);
     }
     else
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -56,13 +56,14 @@ int nvmx_vcpu_initialise(struct vcpu *v)
 {
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+    struct page_info *pg = alloc_domheap_page(NULL, 0);
 
-    nvcpu->nv_n2vmcx = alloc_xenheap_page();
-    if ( !nvcpu->nv_n2vmcx )
+    if ( !pg )
     {
         gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs failed\n");
         return -ENOMEM;
     }
+    nvcpu->nv_n2vmcx_pa = page_to_maddr(pg);
 
     /* non-root VMREAD/VMWRITE bitmap. */
     if ( cpu_has_vmx_vmcs_shadowing )
@@ -129,13 +130,14 @@ void nvmx_vcpu_destroy(struct vcpu *v)
      * in order to avoid double free of L2 VMCS and the possible memory
      * leak of L1 VMCS page.
      */
-    if ( nvcpu->nv_n1vmcx )
-        v->arch.hvm_vmx.vmcs = nvcpu->nv_n1vmcx;
+    if ( nvcpu->nv_n1vmcx_pa )
+        v->arch.hvm_vmx.vmcs_pa = nvcpu->nv_n1vmcx_pa;
 
-    if ( nvcpu->nv_n2vmcx ) {
-        __vmpclear(virt_to_maddr(nvcpu->nv_n2vmcx));
-        free_xenheap_page(nvcpu->nv_n2vmcx);
-        nvcpu->nv_n2vmcx = NULL;
+    if ( nvcpu->nv_n2vmcx_pa )
+    {
+        __vmpclear(nvcpu->nv_n2vmcx_pa);
+        free_domheap_page(maddr_to_page(nvcpu->nv_n2vmcx_pa));
+        nvcpu->nv_n2vmcx_pa = 0;
     }
 
     /* Must also cope with nvmx_vcpu_initialise() not having got called. */
@@ -726,8 +728,8 @@ static void __clear_current_vvmcs(struct
 {
     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     
-    if ( nvcpu->nv_n2vmcx )
-        __vmpclear(virt_to_maddr(nvcpu->nv_n2vmcx));
+    if ( nvcpu->nv_n2vmcx_pa )
+        __vmpclear(nvcpu->nv_n2vmcx_pa);
 }
 
 static bool_t __must_check _map_msr_bitmap(struct vcpu *v)
@@ -1125,7 +1127,7 @@ static void virtual_vmentry(struct cpu_u
     void *vvmcs = nvcpu->nv_vvmcx;
     unsigned long lm_l1, lm_l2;
 
-    vmx_vmcs_switch(v->arch.hvm_vmx.vmcs, nvcpu->nv_n2vmcx);
+    vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n2vmcx_pa);
 
     nestedhvm_vcpu_enter_guestmode(v);
     nvcpu->nv_vmentry_pending = 0;
@@ -1335,7 +1337,7 @@ static void virtual_vmexit(struct cpu_us
          !(v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
         shadow_to_vvmcs_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields);
 
-    vmx_vmcs_switch(v->arch.hvm_vmx.vmcs, nvcpu->nv_n1vmcx);
+    vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n1vmcx_pa);
 
     nestedhvm_vcpu_exit_guestmode(v);
     nvcpu->nv_vmexit_pending = 0;
@@ -1433,10 +1435,11 @@ int nvmx_handle_vmxon(struct cpu_user_re
      * `fork' the host vmcs to shadow_vmcs
      * vmcs_lock is not needed since we are on current
      */
-    nvcpu->nv_n1vmcx = v->arch.hvm_vmx.vmcs;
-    __vmpclear(virt_to_maddr(v->arch.hvm_vmx.vmcs));
-    memcpy(nvcpu->nv_n2vmcx, v->arch.hvm_vmx.vmcs, PAGE_SIZE);
-    __vmptrld(virt_to_maddr(v->arch.hvm_vmx.vmcs));
+    nvcpu->nv_n1vmcx_pa = v->arch.hvm_vmx.vmcs_pa;
+    __vmpclear(v->arch.hvm_vmx.vmcs_pa);
+    copy_domain_page(_mfn(PFN_DOWN(nvcpu->nv_n2vmcx_pa)),
+                     _mfn(PFN_DOWN(v->arch.hvm_vmx.vmcs_pa)));
+    __vmptrld(v->arch.hvm_vmx.vmcs_pa);
     v->arch.hvm_vmx.launched = 0;
     vmreturn(regs, VMSUCCEED);
 
@@ -1888,9 +1891,15 @@ int nvmx_msr_read_intercept(unsigned int
      */
     switch (msr) {
     case MSR_IA32_VMX_BASIC:
+    {
+        const struct vmcs_struct *vmcs =
+            map_domain_page(_mfn(PFN_DOWN(v->arch.hvm_vmx.vmcs_pa)));
+
         data = (host_data & (~0ul << 32)) |
-               (v->arch.hvm_vmx.vmcs->vmcs_revision_id & 0x7fffffff);
+               (vmcs->vmcs_revision_id & 0x7fffffff);
+        unmap_domain_page(vmcs);
         break;
+    }
     case MSR_IA32_VMX_PINBASED_CTLS:
     case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
         /* 1-seetings */
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -104,8 +104,8 @@ struct nestedvcpu {
     void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */
 
     uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */
-    uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */
-    uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */
+    paddr_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */
+    paddr_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */
 
     /* SVM/VMX arch specific */
     union {
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -92,8 +92,8 @@ struct pi_desc {
 #define NR_PML_ENTRIES   512
 
 struct arch_vmx_struct {
-    /* Virtual address of VMCS. */
-    struct vmcs_struct  *vmcs;
+    /* Physical address of VMCS. */
+    paddr_t              vmcs_pa;
     /* VMCS shadow machine address. */
     paddr_t             vmcs_shadow_maddr;
 
@@ -488,7 +488,7 @@ void vmx_enable_intercept_for_msr(struct
 int vmx_read_guest_msr(u32 msr, u64 *val);
 int vmx_write_guest_msr(u32 msr, u64 val);
 int vmx_add_msr(u32 msr, int type);
-void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to);
+void vmx_vmcs_switch(paddr_t from, paddr_t to);
 void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector);
 void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector);
 int vmx_check_msr_bitmap(unsigned long *msr_bitmap, u32 msr, int access_type);



[-- Attachment #2: VMX-VMCS-domheap.patch --]
[-- Type: text/plain, Size: 11892 bytes --]

VMX: allocate VMCS pages from domain heap

There being only very few uses of the virtual address of a VMCS,
convert these cases to establish a mapping and lift the Xen heap
restriction from the VMCS allocation.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -119,8 +119,8 @@ const u32 vmx_introspection_force_enable
 const unsigned int vmx_introspection_force_enabled_msrs_size =
     ARRAY_SIZE(vmx_introspection_force_enabled_msrs);
 
-static DEFINE_PER_CPU_READ_MOSTLY(struct vmcs_struct *, vmxon_region);
-static DEFINE_PER_CPU(struct vmcs_struct *, current_vmcs);
+static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, vmxon_region);
+static DEFINE_PER_CPU(paddr_t, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
 DEFINE_PER_CPU(bool_t, vmxon);
 
@@ -483,25 +483,28 @@ static int vmx_init_vmcs_config(void)
     return 0;
 }
 
-static struct vmcs_struct *vmx_alloc_vmcs(void)
+static paddr_t vmx_alloc_vmcs(void)
 {
+    struct page_info *pg;
     struct vmcs_struct *vmcs;
 
-    if ( (vmcs = alloc_xenheap_page()) == NULL )
+    if ( (pg = alloc_domheap_page(NULL, 0)) == NULL )
     {
         gdprintk(XENLOG_WARNING, "Failed to allocate VMCS.\n");
-        return NULL;
+        return 0;
     }
 
+    vmcs = __map_domain_page(pg);
     clear_page(vmcs);
     vmcs->vmcs_revision_id = vmcs_revision_id;
+    unmap_domain_page(vmcs);
 
-    return vmcs;
+    return page_to_maddr(pg);
 }
 
-static void vmx_free_vmcs(struct vmcs_struct *vmcs)
+static void vmx_free_vmcs(paddr_t pa)
 {
-    free_xenheap_page(vmcs);
+    free_domheap_page(maddr_to_page(pa));
 }
 
 static void __vmx_clear_vmcs(void *info)
@@ -514,7 +517,7 @@ static void __vmx_clear_vmcs(void *info)
 
     if ( arch_vmx->active_cpu == smp_processor_id() )
     {
-        __vmpclear(virt_to_maddr(arch_vmx->vmcs));
+        __vmpclear(arch_vmx->vmcs_pa);
         if ( arch_vmx->vmcs_shadow_maddr )
             __vmpclear(arch_vmx->vmcs_shadow_maddr);
 
@@ -523,8 +526,8 @@ static void __vmx_clear_vmcs(void *info)
 
         list_del(&arch_vmx->active_list);
 
-        if ( arch_vmx->vmcs == this_cpu(current_vmcs) )
-            this_cpu(current_vmcs) = NULL;
+        if ( arch_vmx->vmcs_pa == this_cpu(current_vmcs) )
+            this_cpu(current_vmcs) = 0;
     }
 }
 
@@ -550,8 +553,8 @@ static void vmx_load_vmcs(struct vcpu *v
 
     ASSERT(v->arch.hvm_vmx.active_cpu == smp_processor_id());
 
-    __vmptrld(virt_to_maddr(v->arch.hvm_vmx.vmcs));
-    this_cpu(current_vmcs) = v->arch.hvm_vmx.vmcs;
+    __vmptrld(v->arch.hvm_vmx.vmcs_pa);
+    this_cpu(current_vmcs) = v->arch.hvm_vmx.vmcs_pa;
 
     local_irq_restore(flags);
 }
@@ -565,11 +568,11 @@ int vmx_cpu_up_prepare(unsigned int cpu)
     if ( nvmx_cpu_up_prepare(cpu) != 0 )
         printk("CPU%d: Could not allocate virtual VMCS buffer.\n", cpu);
 
-    if ( per_cpu(vmxon_region, cpu) != NULL )
+    if ( per_cpu(vmxon_region, cpu) )
         return 0;
 
     per_cpu(vmxon_region, cpu) = vmx_alloc_vmcs();
-    if ( per_cpu(vmxon_region, cpu) != NULL )
+    if ( per_cpu(vmxon_region, cpu) )
         return 0;
 
     printk("CPU%d: Could not allocate host VMCS\n", cpu);
@@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
 void vmx_cpu_dead(unsigned int cpu)
 {
     vmx_free_vmcs(per_cpu(vmxon_region, cpu));
-    per_cpu(vmxon_region, cpu) = NULL;
+    per_cpu(vmxon_region, cpu) = 0;
     nvmx_cpu_dead(cpu);
 }
 
@@ -638,7 +641,7 @@ int vmx_cpu_up(void)
     if ( (rc = vmx_cpu_up_prepare(cpu)) != 0 )
         return rc;
 
-    switch ( __vmxon(virt_to_maddr(this_cpu(vmxon_region))) )
+    switch ( __vmxon(this_cpu(vmxon_region)) )
     {
     case -2: /* #UD or #GP */
         if ( bios_locked &&
@@ -710,7 +713,7 @@ bool_t vmx_vmcs_try_enter(struct vcpu *v
      * vmx_vmcs_enter/exit and scheduling tail critical regions.
      */
     if ( likely(v == current) )
-        return v->arch.hvm_vmx.vmcs == this_cpu(current_vmcs);
+        return v->arch.hvm_vmx.vmcs_pa == this_cpu(current_vmcs);
 
     fv = &this_cpu(foreign_vmcs);
 
@@ -906,17 +909,17 @@ int vmx_check_msr_bitmap(unsigned long *
 /*
  * Switch VMCS between layer 1 & 2 guest
  */
-void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to)
+void vmx_vmcs_switch(paddr_t from, paddr_t to)
 {
     struct arch_vmx_struct *vmx = &current->arch.hvm_vmx;
     spin_lock(&vmx->vmcs_lock);
 
-    __vmpclear(virt_to_maddr(from));
+    __vmpclear(from);
     if ( vmx->vmcs_shadow_maddr )
         __vmpclear(vmx->vmcs_shadow_maddr);
-    __vmptrld(virt_to_maddr(to));
+    __vmptrld(to);
 
-    vmx->vmcs = to;
+    vmx->vmcs_pa = to;
     vmx->launched = 0;
     this_cpu(current_vmcs) = to;
 
@@ -936,11 +939,11 @@ void virtual_vmcs_enter(void *vvmcs)
 
 void virtual_vmcs_exit(void *vvmcs)
 {
-    struct vmcs_struct *cur = this_cpu(current_vmcs);
+    paddr_t cur = this_cpu(current_vmcs);
 
     __vmpclear(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
     if ( cur )
-        __vmptrld(virt_to_maddr(cur));
+        __vmptrld(cur);
 
 }
 
@@ -1558,17 +1561,17 @@ int vmx_create_vmcs(struct vcpu *v)
     struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
     int rc;
 
-    if ( (arch_vmx->vmcs = vmx_alloc_vmcs()) == NULL )
+    if ( (arch_vmx->vmcs_pa = vmx_alloc_vmcs()) == 0 )
         return -ENOMEM;
 
     INIT_LIST_HEAD(&arch_vmx->active_list);
-    __vmpclear(virt_to_maddr(arch_vmx->vmcs));
+    __vmpclear(arch_vmx->vmcs_pa);
     arch_vmx->active_cpu = -1;
     arch_vmx->launched   = 0;
 
     if ( (rc = construct_vmcs(v)) != 0 )
     {
-        vmx_free_vmcs(arch_vmx->vmcs);
+        vmx_free_vmcs(arch_vmx->vmcs_pa);
         return rc;
     }
 
@@ -1581,7 +1584,7 @@ void vmx_destroy_vmcs(struct vcpu *v)
 
     vmx_clear_vmcs(v);
 
-    vmx_free_vmcs(arch_vmx->vmcs);
+    vmx_free_vmcs(arch_vmx->vmcs_pa);
 
     free_xenheap_page(v->arch.hvm_vmx.host_msr_area);
     free_xenheap_page(v->arch.hvm_vmx.msr_area);
@@ -1612,7 +1615,7 @@ void vmx_do_resume(struct vcpu *v)
 
     if ( v->arch.hvm_vmx.active_cpu == smp_processor_id() )
     {
-        if ( v->arch.hvm_vmx.vmcs != this_cpu(current_vmcs) )
+        if ( v->arch.hvm_vmx.vmcs_pa != this_cpu(current_vmcs) )
             vmx_load_vmcs(v);
     }
     else
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -56,13 +56,14 @@ int nvmx_vcpu_initialise(struct vcpu *v)
 {
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+    struct page_info *pg = alloc_domheap_page(NULL, 0);
 
-    nvcpu->nv_n2vmcx = alloc_xenheap_page();
-    if ( !nvcpu->nv_n2vmcx )
+    if ( !pg )
     {
         gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs failed\n");
         return -ENOMEM;
     }
+    nvcpu->nv_n2vmcx_pa = page_to_maddr(pg);
 
     /* non-root VMREAD/VMWRITE bitmap. */
     if ( cpu_has_vmx_vmcs_shadowing )
@@ -129,13 +130,14 @@ void nvmx_vcpu_destroy(struct vcpu *v)
      * in order to avoid double free of L2 VMCS and the possible memory
      * leak of L1 VMCS page.
      */
-    if ( nvcpu->nv_n1vmcx )
-        v->arch.hvm_vmx.vmcs = nvcpu->nv_n1vmcx;
+    if ( nvcpu->nv_n1vmcx_pa )
+        v->arch.hvm_vmx.vmcs_pa = nvcpu->nv_n1vmcx_pa;
 
-    if ( nvcpu->nv_n2vmcx ) {
-        __vmpclear(virt_to_maddr(nvcpu->nv_n2vmcx));
-        free_xenheap_page(nvcpu->nv_n2vmcx);
-        nvcpu->nv_n2vmcx = NULL;
+    if ( nvcpu->nv_n2vmcx_pa )
+    {
+        __vmpclear(nvcpu->nv_n2vmcx_pa);
+        free_domheap_page(maddr_to_page(nvcpu->nv_n2vmcx_pa));
+        nvcpu->nv_n2vmcx_pa = 0;
     }
 
     /* Must also cope with nvmx_vcpu_initialise() not having got called. */
@@ -726,8 +728,8 @@ static void __clear_current_vvmcs(struct
 {
     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     
-    if ( nvcpu->nv_n2vmcx )
-        __vmpclear(virt_to_maddr(nvcpu->nv_n2vmcx));
+    if ( nvcpu->nv_n2vmcx_pa )
+        __vmpclear(nvcpu->nv_n2vmcx_pa);
 }
 
 static bool_t __must_check _map_msr_bitmap(struct vcpu *v)
@@ -1125,7 +1127,7 @@ static void virtual_vmentry(struct cpu_u
     void *vvmcs = nvcpu->nv_vvmcx;
     unsigned long lm_l1, lm_l2;
 
-    vmx_vmcs_switch(v->arch.hvm_vmx.vmcs, nvcpu->nv_n2vmcx);
+    vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n2vmcx_pa);
 
     nestedhvm_vcpu_enter_guestmode(v);
     nvcpu->nv_vmentry_pending = 0;
@@ -1335,7 +1337,7 @@ static void virtual_vmexit(struct cpu_us
          !(v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
         shadow_to_vvmcs_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields);
 
-    vmx_vmcs_switch(v->arch.hvm_vmx.vmcs, nvcpu->nv_n1vmcx);
+    vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n1vmcx_pa);
 
     nestedhvm_vcpu_exit_guestmode(v);
     nvcpu->nv_vmexit_pending = 0;
@@ -1433,10 +1435,11 @@ int nvmx_handle_vmxon(struct cpu_user_re
      * `fork' the host vmcs to shadow_vmcs
      * vmcs_lock is not needed since we are on current
      */
-    nvcpu->nv_n1vmcx = v->arch.hvm_vmx.vmcs;
-    __vmpclear(virt_to_maddr(v->arch.hvm_vmx.vmcs));
-    memcpy(nvcpu->nv_n2vmcx, v->arch.hvm_vmx.vmcs, PAGE_SIZE);
-    __vmptrld(virt_to_maddr(v->arch.hvm_vmx.vmcs));
+    nvcpu->nv_n1vmcx_pa = v->arch.hvm_vmx.vmcs_pa;
+    __vmpclear(v->arch.hvm_vmx.vmcs_pa);
+    copy_domain_page(_mfn(PFN_DOWN(nvcpu->nv_n2vmcx_pa)),
+                     _mfn(PFN_DOWN(v->arch.hvm_vmx.vmcs_pa)));
+    __vmptrld(v->arch.hvm_vmx.vmcs_pa);
     v->arch.hvm_vmx.launched = 0;
     vmreturn(regs, VMSUCCEED);
 
@@ -1888,9 +1891,15 @@ int nvmx_msr_read_intercept(unsigned int
      */
     switch (msr) {
     case MSR_IA32_VMX_BASIC:
+    {
+        const struct vmcs_struct *vmcs =
+            map_domain_page(_mfn(PFN_DOWN(v->arch.hvm_vmx.vmcs_pa)));
+
         data = (host_data & (~0ul << 32)) |
-               (v->arch.hvm_vmx.vmcs->vmcs_revision_id & 0x7fffffff);
+               (vmcs->vmcs_revision_id & 0x7fffffff);
+        unmap_domain_page(vmcs);
         break;
+    }
     case MSR_IA32_VMX_PINBASED_CTLS:
     case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
         /* 1-seetings */
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -104,8 +104,8 @@ struct nestedvcpu {
     void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */
 
     uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */
-    uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */
-    uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */
+    paddr_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */
+    paddr_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */
 
     /* SVM/VMX arch specific */
     union {
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -92,8 +92,8 @@ struct pi_desc {
 #define NR_PML_ENTRIES   512
 
 struct arch_vmx_struct {
-    /* Virtual address of VMCS. */
-    struct vmcs_struct  *vmcs;
+    /* Physical address of VMCS. */
+    paddr_t              vmcs_pa;
     /* VMCS shadow machine address. */
     paddr_t             vmcs_shadow_maddr;
 
@@ -488,7 +488,7 @@ void vmx_enable_intercept_for_msr(struct
 int vmx_read_guest_msr(u32 msr, u64 *val);
 int vmx_write_guest_msr(u32 msr, u64 val);
 int vmx_add_msr(u32 msr, int type);
-void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to);
+void vmx_vmcs_switch(paddr_t from, paddr_t to);
 void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector);
 void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector);
 int vmx_check_msr_bitmap(unsigned long *msr_bitmap, u32 msr, int access_type);

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 3/3] vVMX: use latched VMCS machine address
  2015-10-19 15:10 [PATCH 0/3] VMX: misc adjustments Jan Beulich
  2015-10-19 15:21 ` [PATCH 1/3] VMX: re-order definitions Jan Beulich
  2015-10-19 15:22 ` [PATCH 2/3] VMX: allocate VMCS pages from domain heap Jan Beulich
@ 2015-10-19 15:23 ` Jan Beulich
  2015-10-21  5:44   ` Tian, Kevin
  2016-02-23  8:34   ` Li, Liang Z
  2 siblings, 2 replies; 26+ messages in thread
From: Jan Beulich @ 2015-10-19 15:23 UTC (permalink / raw)
  To: xen-devel; +Cc: Kevin Tian, Jun Nakajima

[-- Attachment #1: Type: text/plain, Size: 40533 bytes --]

Instead of calling domain_page_map_to_mfn() over and over, latch the
guest VMCS machine address unconditionally (i.e. independent of whether
VMCS shadowing is supported by the hardware).

Since this requires altering the parameters of __[gs]et_vmcs{,_real}()
(and hence all their callers) anyway, take the opportunity to also drop
the bogus double underscores from their names (and from
__[gs]et_vmcs_virtual() as well).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -191,13 +191,13 @@ static int nvmx_intr_intercept(struct vc
         if ( intack.source == hvm_intsrc_pic ||
                  intack.source == hvm_intsrc_lapic )
         {
-            ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
+            ctrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
             if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
                 return 0;
 
             vmx_inject_extint(intack.vector, intack.source);
 
-            ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, VM_EXIT_CONTROLS);
+            ctrl = get_vvmcs(v, VM_EXIT_CONTROLS);
             if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
             {
                 /* for now, duplicate the ack path in vmx_intr_assist */
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -932,37 +932,36 @@ void vmx_vmcs_switch(paddr_t from, paddr
     spin_unlock(&vmx->vmcs_lock);
 }
 
-void virtual_vmcs_enter(void *vvmcs)
+void virtual_vmcs_enter(const struct vcpu *v)
 {
-    __vmptrld(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
+    __vmptrld(v->arch.hvm_vmx.vmcs_shadow_maddr);
 }
 
-void virtual_vmcs_exit(void *vvmcs)
+void virtual_vmcs_exit(const struct vcpu *v)
 {
     paddr_t cur = this_cpu(current_vmcs);
 
-    __vmpclear(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
+    __vmpclear(v->arch.hvm_vmx.vmcs_shadow_maddr);
     if ( cur )
         __vmptrld(cur);
-
 }
 
-u64 virtual_vmcs_vmread(void *vvmcs, u32 vmcs_encoding)
+u64 virtual_vmcs_vmread(const struct vcpu *v, u32 vmcs_encoding)
 {
     u64 res;
 
-    virtual_vmcs_enter(vvmcs);
+    virtual_vmcs_enter(v);
     __vmread(vmcs_encoding, &res);
-    virtual_vmcs_exit(vvmcs);
+    virtual_vmcs_exit(v);
 
     return res;
 }
 
-void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val)
+void virtual_vmcs_vmwrite(const struct vcpu *v, u32 vmcs_encoding, u64 val)
 {
-    virtual_vmcs_enter(vvmcs);
+    virtual_vmcs_enter(v);
     __vmwrite(vmcs_encoding, val);
-    virtual_vmcs_exit(vvmcs);
+    virtual_vmcs_exit(v);
 }
 
 static int construct_vmcs(struct vcpu *v)
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1464,8 +1464,7 @@ void vmx_inject_extint(int trap, uint8_t
     u32    pin_based_cntrl;
 
     if ( nestedhvm_vcpu_in_guestmode(v) ) {
-        pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, 
-                                     PIN_BASED_VM_EXEC_CONTROL);
+        pin_based_cntrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
         if ( pin_based_cntrl & PIN_BASED_EXT_INTR_MASK ) {
             nvmx_enqueue_n2_exceptions (v, 
                INTR_INFO_VALID_MASK |
@@ -1485,8 +1484,7 @@ void vmx_inject_nmi(void)
     u32    pin_based_cntrl;
 
     if ( nestedhvm_vcpu_in_guestmode(v) ) {
-        pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, 
-                                     PIN_BASED_VM_EXEC_CONTROL);
+        pin_based_cntrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
         if ( pin_based_cntrl & PIN_BASED_NMI_EXITING ) {
             nvmx_enqueue_n2_exceptions (v, 
                INTR_INFO_VALID_MASK |
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -175,11 +175,7 @@ int nvmx_vcpu_reset(struct vcpu *v)
 
 uint64_t nvmx_vcpu_eptp_base(struct vcpu *v)
 {
-    uint64_t eptp_base;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-
-    eptp_base = __get_vvmcs(nvcpu->nv_vvmcx, EPT_POINTER);
-    return eptp_base & PAGE_MASK;
+    return get_vvmcs(v, EPT_POINTER) & PAGE_MASK;
 }
 
 bool_t nvmx_ept_enabled(struct vcpu *v)
@@ -236,7 +232,7 @@ static int vvmcs_offset(u32 width, u32 t
     return offset;
 }
 
-u64 __get_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding)
+u64 get_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding)
 {
     union vmcs_encoding enc;
     u64 *content = (u64 *) vvmcs;
@@ -266,12 +262,12 @@ u64 __get_vvmcs_virtual(void *vvmcs, u32
     return res;
 }
 
-u64 __get_vvmcs_real(void *vvmcs, u32 vmcs_encoding)
+u64 get_vvmcs_real(const struct vcpu *v, u32 encoding)
 {
-    return virtual_vmcs_vmread(vvmcs, vmcs_encoding);
+    return virtual_vmcs_vmread(v, encoding);
 }
 
-void __set_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding, u64 val)
+void set_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding, u64 val)
 {
     union vmcs_encoding enc;
     u64 *content = (u64 *) vvmcs;
@@ -307,9 +303,9 @@ void __set_vvmcs_virtual(void *vvmcs, u3
     content[offset] = res;
 }
 
-void __set_vvmcs_real(void *vvmcs, u32 vmcs_encoding, u64 val)
+void set_vvmcs_real(const struct vcpu *v, u32 encoding, u64 val)
 {
-    virtual_vmcs_vmwrite(vvmcs, vmcs_encoding, val);
+    virtual_vmcs_vmwrite(v, encoding, val);
 }
 
 static unsigned long reg_read(struct cpu_user_regs *regs,
@@ -331,25 +327,20 @@ static void reg_write(struct cpu_user_re
 
 static inline u32 __n2_pin_exec_control(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-
-    return __get_vvmcs(nvcpu->nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
+    return get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
 }
 
 static inline u32 __n2_exec_control(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-
-    return __get_vvmcs(nvcpu->nv_vvmcx, CPU_BASED_VM_EXEC_CONTROL);
+    return get_vvmcs(v, CPU_BASED_VM_EXEC_CONTROL);
 }
 
 static inline u32 __n2_secondary_exec_control(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u64 second_ctrl = 0;
 
     if ( __n2_exec_control(v) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
-        second_ctrl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL);
+        second_ctrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
 
     return second_ctrl;
 }
@@ -502,18 +493,17 @@ static void vmreturn(struct cpu_user_reg
 bool_t nvmx_intercepts_exception(struct vcpu *v, unsigned int trap,
                                  int error_code)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u32 exception_bitmap, pfec_match=0, pfec_mask=0;
     int r;
 
     ASSERT ( trap < 32 );
 
-    exception_bitmap = __get_vvmcs(nvcpu->nv_vvmcx, EXCEPTION_BITMAP);
+    exception_bitmap = get_vvmcs(v, EXCEPTION_BITMAP);
     r = exception_bitmap & (1 << trap) ? 1: 0;
 
     if ( trap == TRAP_page_fault ) {
-        pfec_match = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MATCH);
-        pfec_mask  = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MASK);
+        pfec_match = get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MATCH);
+        pfec_mask  = get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MASK);
         if ( (error_code & pfec_mask) != pfec_match )
             r = !r;
     }
@@ -528,9 +518,7 @@ static inline u32 __shadow_control(struc
                                  unsigned int field,
                                  u32 host_value)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-
-    return (u32) __get_vvmcs(nvcpu->nv_vvmcx, field) | host_value;
+    return get_vvmcs(v, field) | host_value;
 }
 
 static void set_shadow_control(struct vcpu *v,
@@ -597,13 +585,12 @@ void nvmx_update_secondary_exec_control(
                                         unsigned long host_cntrl)
 {
     u32 shadow_cntrl;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
     u32 apicv_bit = SECONDARY_EXEC_APIC_REGISTER_VIRT |
                     SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
 
     host_cntrl &= ~apicv_bit;
-    shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL);
+    shadow_cntrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
 
     /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
     ASSERT(!(shadow_cntrl & apicv_bit));
@@ -616,10 +603,9 @@ void nvmx_update_secondary_exec_control(
 static void nvmx_update_pin_control(struct vcpu *v, unsigned long host_cntrl)
 {
     u32 shadow_cntrl;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
 
     host_cntrl &= ~PIN_BASED_POSTED_INTERRUPT;
-    shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
+    shadow_cntrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
 
     /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
     ASSERT(!(shadow_cntrl & PIN_BASED_POSTED_INTERRUPT));
@@ -631,9 +617,8 @@ static void nvmx_update_pin_control(stru
 static void nvmx_update_exit_control(struct vcpu *v, unsigned long host_cntrl)
 {
     u32 shadow_cntrl;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
 
-    shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_CONTROLS);
+    shadow_cntrl = get_vvmcs(v, VM_EXIT_CONTROLS);
     shadow_cntrl &= ~(VM_EXIT_SAVE_DEBUG_CNTRLS 
                       | VM_EXIT_LOAD_HOST_PAT
                       | VM_EXIT_LOAD_HOST_EFER
@@ -645,9 +630,8 @@ static void nvmx_update_exit_control(str
 static void nvmx_update_entry_control(struct vcpu *v)
 {
     u32 shadow_cntrl;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
 
-    shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, VM_ENTRY_CONTROLS);
+    shadow_cntrl = get_vvmcs(v, VM_ENTRY_CONTROLS);
     shadow_cntrl &= ~(VM_ENTRY_LOAD_GUEST_PAT
                       | VM_ENTRY_LOAD_GUEST_EFER
                       | VM_ENTRY_LOAD_PERF_GLOBAL_CTRL);
@@ -661,7 +645,6 @@ void nvmx_update_exception_bitmap(struct
 
 static void nvmx_update_apic_access_address(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u32 ctrl;
 
     ctrl = __n2_secondary_exec_control(v);
@@ -671,7 +654,7 @@ static void nvmx_update_apic_access_addr
         unsigned long apic_gpfn;
         struct page_info *apic_pg;
 
-        apic_gpfn = __get_vvmcs(nvcpu->nv_vvmcx, APIC_ACCESS_ADDR) >> PAGE_SHIFT;
+        apic_gpfn = get_vvmcs(v, APIC_ACCESS_ADDR) >> PAGE_SHIFT;
         apic_pg = get_page_from_gfn(v->domain, apic_gpfn, &p2mt, P2M_ALLOC);
         ASSERT(apic_pg && !p2m_is_paging(p2mt));
         __vmwrite(APIC_ACCESS_ADDR, page_to_maddr(apic_pg));
@@ -683,7 +666,6 @@ static void nvmx_update_apic_access_addr
 
 static void nvmx_update_virtual_apic_address(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u32 ctrl;
 
     ctrl = __n2_exec_control(v);
@@ -693,7 +675,7 @@ static void nvmx_update_virtual_apic_add
         unsigned long vapic_gpfn;
         struct page_info *vapic_pg;
 
-        vapic_gpfn = __get_vvmcs(nvcpu->nv_vvmcx, VIRTUAL_APIC_PAGE_ADDR) >> PAGE_SHIFT;
+        vapic_gpfn = get_vvmcs(v, VIRTUAL_APIC_PAGE_ADDR) >> PAGE_SHIFT;
         vapic_pg = get_page_from_gfn(v->domain, vapic_gpfn, &p2mt, P2M_ALLOC);
         ASSERT(vapic_pg && !p2m_is_paging(p2mt));
         __vmwrite(VIRTUAL_APIC_PAGE_ADDR, page_to_maddr(vapic_pg));
@@ -705,23 +687,20 @@ static void nvmx_update_virtual_apic_add
 
 static void nvmx_update_tpr_threshold(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u32 ctrl = __n2_exec_control(v);
+
     if ( ctrl & CPU_BASED_TPR_SHADOW )
-        __vmwrite(TPR_THRESHOLD, __get_vvmcs(nvcpu->nv_vvmcx, TPR_THRESHOLD));
+        __vmwrite(TPR_THRESHOLD, get_vvmcs(v, TPR_THRESHOLD));
     else
         __vmwrite(TPR_THRESHOLD, 0);
 }
 
 static void nvmx_update_pfec(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
-
     __vmwrite(PAGE_FAULT_ERROR_CODE_MASK,
-        __get_vvmcs(vvmcs, PAGE_FAULT_ERROR_CODE_MASK));
+              get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MASK));
     __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH,
-        __get_vvmcs(vvmcs, PAGE_FAULT_ERROR_CODE_MATCH));
+              get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MATCH));
 }
 
 static void __clear_current_vvmcs(struct vcpu *v)
@@ -739,7 +718,7 @@ static bool_t __must_check _map_msr_bitm
 
     if ( nvmx->msrbitmap )
         hvm_unmap_guest_frame(nvmx->msrbitmap, 1);
-    gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, MSR_BITMAP);
+    gpa = get_vvmcs(v, MSR_BITMAP);
     nvmx->msrbitmap = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1);
 
     return nvmx->msrbitmap != NULL;
@@ -754,7 +733,7 @@ static bool_t __must_check _map_io_bitma
     index = vmcs_reg == IO_BITMAP_A ? 0 : 1;
     if (nvmx->iobitmap[index])
         hvm_unmap_guest_frame(nvmx->iobitmap[index], 1);
-    gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg);
+    gpa = get_vvmcs(v, vmcs_reg);
     nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1);
 
     return nvmx->iobitmap[index] != NULL;
@@ -777,6 +756,7 @@ static void nvmx_purge_vvmcs(struct vcpu
         hvm_unmap_guest_frame(nvcpu->nv_vvmcx, 1);
     nvcpu->nv_vvmcx = NULL;
     nvcpu->nv_vvmcxaddr = VMCX_EADDR;
+    v->arch.hvm_vmx.vmcs_shadow_maddr = 0;
     for (i=0; i<2; i++) {
         if ( nvmx->iobitmap[i] ) {
             hvm_unmap_guest_frame(nvmx->iobitmap[i], 1);
@@ -792,11 +772,10 @@ static void nvmx_purge_vvmcs(struct vcpu
 u64 nvmx_get_tsc_offset(struct vcpu *v)
 {
     u64 offset = 0;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
 
-    if ( __get_vvmcs(nvcpu->nv_vvmcx, CPU_BASED_VM_EXEC_CONTROL) &
+    if ( get_vvmcs(v, CPU_BASED_VM_EXEC_CONTROL) &
          CPU_BASED_USE_TSC_OFFSETING )
-        offset = __get_vvmcs(nvcpu->nv_vvmcx, TSC_OFFSET);
+        offset = get_vvmcs(v, TSC_OFFSET);
 
     return offset;
 }
@@ -911,12 +890,9 @@ static struct vmcs_host_to_guest {
     {HOST_SYSENTER_EIP, GUEST_SYSENTER_EIP},
 };
 
-static void vvmcs_to_shadow(void *vvmcs, unsigned int field)
+static void vvmcs_to_shadow(const struct vcpu *v, unsigned int field)
 {
-    u64 value;
-
-    value = __get_vvmcs(vvmcs, field);
-    __vmwrite(field, value);
+    __vmwrite(field, get_vvmcs(v, field));
 }
 
 static void vvmcs_to_shadow_bulk(struct vcpu *v, unsigned int n,
@@ -950,15 +926,15 @@ static void vvmcs_to_shadow_bulk(struct 
 
 fallback:
     for ( i = 0; i < n; i++ )
-        vvmcs_to_shadow(vvmcs, field[i]);
+        vvmcs_to_shadow(v, field[i]);
 }
 
-static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(const struct vcpu *v, unsigned int field)
 {
     unsigned long value;
 
     if ( __vmread_safe(field, &value) )
-        __set_vvmcs(vvmcs, field, value);
+        set_vvmcs(v, field, value);
 }
 
 static void shadow_to_vvmcs_bulk(struct vcpu *v, unsigned int n,
@@ -992,7 +968,7 @@ static void shadow_to_vvmcs_bulk(struct 
 
 fallback:
     for ( i = 0; i < n; i++ )
-        shadow_to_vvmcs(vvmcs, field[i]);
+        shadow_to_vvmcs(v, field[i]);
 }
 
 static void load_shadow_control(struct vcpu *v)
@@ -1017,7 +993,6 @@ static void load_shadow_control(struct v
 static void load_shadow_guest_state(struct vcpu *v)
 {
     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
     u32 control;
     u64 cr_gh_mask, cr_read_shadow;
 
@@ -1031,18 +1006,18 @@ static void load_shadow_guest_state(stru
     vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmcs_gstate_field),
                          vmcs_gstate_field);
 
-    nvcpu->guest_cr[0] = __get_vvmcs(vvmcs, CR0_READ_SHADOW);
-    nvcpu->guest_cr[4] = __get_vvmcs(vvmcs, CR4_READ_SHADOW);
-    hvm_set_cr0(__get_vvmcs(vvmcs, GUEST_CR0), 1);
-    hvm_set_cr4(__get_vvmcs(vvmcs, GUEST_CR4), 1);
-    hvm_set_cr3(__get_vvmcs(vvmcs, GUEST_CR3), 1);
+    nvcpu->guest_cr[0] = get_vvmcs(v, CR0_READ_SHADOW);
+    nvcpu->guest_cr[4] = get_vvmcs(v, CR4_READ_SHADOW);
+    hvm_set_cr0(get_vvmcs(v, GUEST_CR0), 1);
+    hvm_set_cr4(get_vvmcs(v, GUEST_CR4), 1);
+    hvm_set_cr3(get_vvmcs(v, GUEST_CR3), 1);
 
-    control = __get_vvmcs(vvmcs, VM_ENTRY_CONTROLS);
+    control = get_vvmcs(v, VM_ENTRY_CONTROLS);
     if ( control & VM_ENTRY_LOAD_GUEST_PAT )
-        hvm_set_guest_pat(v, __get_vvmcs(vvmcs, GUEST_PAT));
+        hvm_set_guest_pat(v, get_vvmcs(v, GUEST_PAT));
     if ( control & VM_ENTRY_LOAD_PERF_GLOBAL_CTRL )
         hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL,
-                                __get_vvmcs(vvmcs, GUEST_PERF_GLOBAL_CTRL), 0);
+                                get_vvmcs(v, GUEST_PERF_GLOBAL_CTRL), 0);
 
     hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
 
@@ -1053,14 +1028,14 @@ static void load_shadow_guest_state(stru
      * guest host mask to 0xffffffff in shadow VMCS (follow the host L1 VMCS),
      * then calculate the corresponding read shadow separately for CR0 and CR4.
      */
-    cr_gh_mask = __get_vvmcs(vvmcs, CR0_GUEST_HOST_MASK);
-    cr_read_shadow = (__get_vvmcs(vvmcs, GUEST_CR0) & ~cr_gh_mask) |
-                     (__get_vvmcs(vvmcs, CR0_READ_SHADOW) & cr_gh_mask);
+    cr_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
+    cr_read_shadow = (get_vvmcs(v, GUEST_CR0) & ~cr_gh_mask) |
+                     (get_vvmcs(v, CR0_READ_SHADOW) & cr_gh_mask);
     __vmwrite(CR0_READ_SHADOW, cr_read_shadow);
 
-    cr_gh_mask = __get_vvmcs(vvmcs, CR4_GUEST_HOST_MASK);
-    cr_read_shadow = (__get_vvmcs(vvmcs, GUEST_CR4) & ~cr_gh_mask) |
-                     (__get_vvmcs(vvmcs, CR4_READ_SHADOW) & cr_gh_mask);
+    cr_gh_mask = get_vvmcs(v, CR4_GUEST_HOST_MASK);
+    cr_read_shadow = (get_vvmcs(v, GUEST_CR4) & ~cr_gh_mask) |
+                     (get_vvmcs(v, CR4_READ_SHADOW) & cr_gh_mask);
     __vmwrite(CR4_READ_SHADOW, cr_read_shadow);
 
     /* TODO: CR3 target control */
@@ -1084,11 +1059,11 @@ static uint64_t get_host_eptp(struct vcp
     return ept_get_eptp(ept_data);
 }
 
-static bool_t nvmx_vpid_enabled(struct nestedvcpu *nvcpu)
+static bool_t nvmx_vpid_enabled(const struct vcpu *v)
 {
     uint32_t second_cntl;
 
-    second_cntl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL);
+    second_cntl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
     if ( second_cntl & SECONDARY_EXEC_ENABLE_VPID )
         return 1;
     return 0;
@@ -1099,32 +1074,38 @@ static void nvmx_set_vmcs_pointer(struct
     unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
     paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
 
-    __vmpclear(vvmcs_maddr);
-    vvmcs->vmcs_revision_id |= VMCS_RID_TYPE_MASK;
+    if ( cpu_has_vmx_vmcs_shadowing )
+    {
+        __vmpclear(vvmcs_maddr);
+        vvmcs->vmcs_revision_id |= VMCS_RID_TYPE_MASK;
+        __vmwrite(VMCS_LINK_POINTER, vvmcs_maddr);
+        __vmwrite(VMREAD_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmread_bitmap));
+        __vmwrite(VMWRITE_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmwrite_bitmap));
+    }
     v->arch.hvm_vmx.vmcs_shadow_maddr = vvmcs_maddr;
-    __vmwrite(VMCS_LINK_POINTER, vvmcs_maddr);
-    __vmwrite(VMREAD_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmread_bitmap));
-    __vmwrite(VMWRITE_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmwrite_bitmap));
 }
 
 static void nvmx_clear_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
 {
-    unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
-    paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
-
-    __vmpclear(vvmcs_maddr);
-    vvmcs->vmcs_revision_id &= ~VMCS_RID_TYPE_MASK;
     v->arch.hvm_vmx.vmcs_shadow_maddr = 0;
-    __vmwrite(VMCS_LINK_POINTER, ~0ul);
-    __vmwrite(VMREAD_BITMAP, 0);
-    __vmwrite(VMWRITE_BITMAP, 0);
+
+    if ( cpu_has_vmx_vmcs_shadowing )
+    {
+        unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
+        paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
+
+        __vmpclear(vvmcs_maddr);
+        vvmcs->vmcs_revision_id &= ~VMCS_RID_TYPE_MASK;
+        __vmwrite(VMCS_LINK_POINTER, ~0ul);
+        __vmwrite(VMREAD_BITMAP, 0);
+        __vmwrite(VMWRITE_BITMAP, 0);
+    }
 }
 
 static void virtual_vmentry(struct cpu_user_regs *regs)
 {
     struct vcpu *v = current;
     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
     unsigned long lm_l1, lm_l2;
 
     vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n2vmcx_pa);
@@ -1143,8 +1124,7 @@ static void virtual_vmentry(struct cpu_u
      * L1 exit_controls
      */
     lm_l1 = !!hvm_long_mode_enabled(v);
-    lm_l2 = !!(__get_vvmcs(vvmcs, VM_ENTRY_CONTROLS) &
-                           VM_ENTRY_IA32E_MODE);
+    lm_l2 = !!(get_vvmcs(v, VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
 
     if ( lm_l2 )
         v->arch.hvm_vcpu.guest_efer |= EFER_LMA | EFER_LME;
@@ -1161,9 +1141,9 @@ static void virtual_vmentry(struct cpu_u
          !(v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
         vvmcs_to_shadow_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields);
 
-    regs->eip = __get_vvmcs(vvmcs, GUEST_RIP);
-    regs->esp = __get_vvmcs(vvmcs, GUEST_RSP);
-    regs->eflags = __get_vvmcs(vvmcs, GUEST_RFLAGS);
+    regs->eip = get_vvmcs(v, GUEST_RIP);
+    regs->esp = get_vvmcs(v, GUEST_RSP);
+    regs->eflags = get_vvmcs(v, GUEST_RFLAGS);
 
     /* updating host cr0 to sync TS bit */
     __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
@@ -1175,10 +1155,10 @@ static void virtual_vmentry(struct cpu_u
         __vmwrite(EPT_POINTER, get_host_eptp(v));
 
     /* nested VPID support! */
-    if ( cpu_has_vmx_vpid && nvmx_vpid_enabled(nvcpu) )
+    if ( cpu_has_vmx_vpid && nvmx_vpid_enabled(v) )
     {
         struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
-        uint32_t new_vpid =  __get_vvmcs(vvmcs, VIRTUAL_PROCESSOR_ID);
+        uint32_t new_vpid = get_vvmcs(v, VIRTUAL_PROCESSOR_ID);
 
         if ( nvmx->guest_vpid != new_vpid )
         {
@@ -1191,34 +1171,29 @@ static void virtual_vmentry(struct cpu_u
 
 static void sync_vvmcs_guest_state(struct vcpu *v, struct cpu_user_regs *regs)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
-
     /* copy shadow vmcs.gstate back to vvmcs.gstate */
     shadow_to_vvmcs_bulk(v, ARRAY_SIZE(vmcs_gstate_field),
                          vmcs_gstate_field);
     /* RIP, RSP are in user regs */
-    __set_vvmcs(vvmcs, GUEST_RIP, regs->eip);
-    __set_vvmcs(vvmcs, GUEST_RSP, regs->esp);
+    set_vvmcs(v, GUEST_RIP, regs->eip);
+    set_vvmcs(v, GUEST_RSP, regs->esp);
 
     /* CR3 sync if exec doesn't want cr3 load exiting: i.e. nested EPT */
     if ( !(__n2_exec_control(v) & CPU_BASED_CR3_LOAD_EXITING) )
-        shadow_to_vvmcs(vvmcs, GUEST_CR3);
+        shadow_to_vvmcs(v, GUEST_CR3);
 }
 
 static void sync_vvmcs_ro(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
 
     shadow_to_vvmcs_bulk(v, ARRAY_SIZE(vmcs_ro_field), vmcs_ro_field);
 
     /* Adjust exit_reason/exit_qualifciation for violation case */
-    if ( __get_vvmcs(vvmcs, VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION )
+    if ( get_vvmcs(v, VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION )
     {
-        __set_vvmcs(vvmcs, EXIT_QUALIFICATION, nvmx->ept.exit_qual);
-        __set_vvmcs(vvmcs, VM_EXIT_REASON, nvmx->ept.exit_reason);
+        set_vvmcs(v, EXIT_QUALIFICATION, nvmx->ept.exit_qual);
+        set_vvmcs(v, VM_EXIT_REASON, nvmx->ept.exit_reason);
     }
 }
 
@@ -1226,34 +1201,32 @@ static void load_vvmcs_host_state(struct
 {
     int i;
     u64 r;
-    void *vvmcs = vcpu_nestedhvm(v).nv_vvmcx;
     u32 control;
 
     for ( i = 0; i < ARRAY_SIZE(vmcs_h2g_field); i++ )
     {
-        r = __get_vvmcs(vvmcs, vmcs_h2g_field[i].host_field);
+        r = get_vvmcs(v, vmcs_h2g_field[i].host_field);
         __vmwrite(vmcs_h2g_field[i].guest_field, r);
     }
 
-    hvm_set_cr0(__get_vvmcs(vvmcs, HOST_CR0), 1);
-    hvm_set_cr4(__get_vvmcs(vvmcs, HOST_CR4), 1);
-    hvm_set_cr3(__get_vvmcs(vvmcs, HOST_CR3), 1);
+    hvm_set_cr0(get_vvmcs(v, HOST_CR0), 1);
+    hvm_set_cr4(get_vvmcs(v, HOST_CR4), 1);
+    hvm_set_cr3(get_vvmcs(v, HOST_CR3), 1);
 
-    control = __get_vvmcs(vvmcs, VM_EXIT_CONTROLS);
+    control = get_vvmcs(v, VM_EXIT_CONTROLS);
     if ( control & VM_EXIT_LOAD_HOST_PAT )
-        hvm_set_guest_pat(v, __get_vvmcs(vvmcs, HOST_PAT));
+        hvm_set_guest_pat(v, get_vvmcs(v, HOST_PAT));
     if ( control & VM_EXIT_LOAD_PERF_GLOBAL_CTRL )
         hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL,
-                                __get_vvmcs(vvmcs, HOST_PERF_GLOBAL_CTRL), 1);
+                                get_vvmcs(v, HOST_PERF_GLOBAL_CTRL), 1);
 
     hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
 
-    __set_vvmcs(vvmcs, VM_ENTRY_INTR_INFO, 0);
+    set_vvmcs(v, VM_ENTRY_INTR_INFO, 0);
 }
 
 static void sync_exception_state(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
 
     if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) )
@@ -1263,10 +1236,9 @@ static void sync_exception_state(struct 
     {
     case X86_EVENTTYPE_EXT_INTR:
         /* rename exit_reason to EXTERNAL_INTERRUPT */
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON,
-                    EXIT_REASON_EXTERNAL_INTERRUPT);
-        __set_vvmcs(nvcpu->nv_vvmcx, EXIT_QUALIFICATION, 0);
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
+        set_vvmcs(v, VM_EXIT_REASON, EXIT_REASON_EXTERNAL_INTERRUPT);
+        set_vvmcs(v, EXIT_QUALIFICATION, 0);
+        set_vvmcs(v, VM_EXIT_INTR_INFO,
                     nvmx->intr.intr_info);
         break;
 
@@ -1274,17 +1246,13 @@ static void sync_exception_state(struct 
     case X86_EVENTTYPE_SW_INTERRUPT:
     case X86_EVENTTYPE_SW_EXCEPTION:
         /* throw to L1 */
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
-                    nvmx->intr.intr_info);
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_ERROR_CODE,
-                    nvmx->intr.error_code);
+        set_vvmcs(v, VM_EXIT_INTR_INFO, nvmx->intr.intr_info);
+        set_vvmcs(v, VM_EXIT_INTR_ERROR_CODE, nvmx->intr.error_code);
         break;
     case X86_EVENTTYPE_NMI:
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON,
-                    EXIT_REASON_EXCEPTION_NMI);
-        __set_vvmcs(nvcpu->nv_vvmcx, EXIT_QUALIFICATION, 0);
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
-                    nvmx->intr.intr_info);
+        set_vvmcs(v, VM_EXIT_REASON, EXIT_REASON_EXCEPTION_NMI);
+        set_vvmcs(v, EXIT_QUALIFICATION, 0);
+        set_vvmcs(v, VM_EXIT_INTR_INFO, nvmx->intr.intr_info);
         break;
     default:
         gdprintk(XENLOG_ERR, "Exception state %lx not handled\n",
@@ -1296,9 +1264,8 @@ static void sync_exception_state(struct 
 static void nvmx_update_apicv(struct vcpu *v)
 {
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    unsigned long reason = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON);
-    uint32_t intr_info = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO);
+    unsigned long reason = get_vvmcs(v, VM_EXIT_REASON);
+    uint32_t intr_info = get_vvmcs(v, VM_EXIT_INTR_INFO);
 
     if ( reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
          nvmx->intr.source == hvm_intsrc_lapic &&
@@ -1344,8 +1311,7 @@ static void virtual_vmexit(struct cpu_us
     nvcpu->nv_vmswitch_in_progress = 1;
 
     lm_l2 = !!hvm_long_mode_enabled(v);
-    lm_l1 = !!(__get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_CONTROLS) &
-                           VM_EXIT_IA32E_MODE);
+    lm_l1 = !!(get_vvmcs(v, VM_EXIT_CONTROLS) & VM_EXIT_IA32E_MODE);
 
     if ( lm_l1 )
         v->arch.hvm_vcpu.guest_efer |= EFER_LMA | EFER_LME;
@@ -1361,8 +1327,8 @@ static void virtual_vmexit(struct cpu_us
     if ( lm_l1 != lm_l2 )
         paging_update_paging_modes(v);
 
-    regs->eip = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RIP);
-    regs->esp = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RSP);
+    regs->eip = get_vvmcs(v, HOST_RIP);
+    regs->esp = get_vvmcs(v, HOST_RSP);
     /* VM exit clears all bits except bit 1 */
     regs->eflags = 0x2;
 
@@ -1539,7 +1505,6 @@ int nvmx_handle_vmresume(struct cpu_user
 {
     bool_t launched;
     struct vcpu *v = current;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
     int rc = vmx_inst_check_privilege(regs, 0);
 
@@ -1553,7 +1518,7 @@ int nvmx_handle_vmresume(struct cpu_user
     }
 
     launched = vvmcs_launched(&nvmx->launched_list,
-                   domain_page_map_to_mfn(nvcpu->nv_vvmcx));
+                              PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr));
     if ( !launched ) {
        vmreturn (regs, VMFAIL_VALID);
        return X86EMUL_OKAY;
@@ -1565,7 +1530,6 @@ int nvmx_handle_vmlaunch(struct cpu_user
 {
     bool_t launched;
     struct vcpu *v = current;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
     int rc = vmx_inst_check_privilege(regs, 0);
 
@@ -1579,7 +1543,7 @@ int nvmx_handle_vmlaunch(struct cpu_user
     }
 
     launched = vvmcs_launched(&nvmx->launched_list,
-                   domain_page_map_to_mfn(nvcpu->nv_vvmcx));
+                              PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr));
     if ( launched ) {
        vmreturn (regs, VMFAIL_VALID);
        return X86EMUL_OKAY;
@@ -1589,7 +1553,7 @@ int nvmx_handle_vmlaunch(struct cpu_user
         if ( rc == X86EMUL_OKAY )
         {
             if ( set_vvmcs_launched(&nvmx->launched_list,
-                    domain_page_map_to_mfn(nvcpu->nv_vvmcx)) < 0 )
+                                    PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr)) < 0 )
                 return X86EMUL_UNHANDLEABLE;
         }
     }
@@ -1644,8 +1608,7 @@ int nvmx_handle_vmptrld(struct cpu_user_
         }
     }
 
-    if ( cpu_has_vmx_vmcs_shadowing )
-        nvmx_set_vmcs_pointer(v, nvcpu->nv_vvmcx);
+    nvmx_set_vmcs_pointer(v, nvcpu->nv_vvmcx);
 
     vmreturn(regs, VMSUCCEED);
 
@@ -1694,10 +1657,10 @@ int nvmx_handle_vmclear(struct cpu_user_
         rc = VMFAIL_INVALID;
     else if ( gpa == nvcpu->nv_vvmcxaddr )
     {
-        if ( cpu_has_vmx_vmcs_shadowing )
-            nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
-        clear_vvmcs_launched(&nvmx->launched_list,
-            domain_page_map_to_mfn(nvcpu->nv_vvmcx));
+        unsigned long mfn = PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr);
+
+        nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
+        clear_vvmcs_launched(&nvmx->launched_list, mfn);
         nvmx_purge_vvmcs(v);
     }
     else 
@@ -1726,7 +1689,6 @@ int nvmx_handle_vmread(struct cpu_user_r
 {
     struct vcpu *v = current;
     struct vmx_inst_decoded decode;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u64 value = 0;
     int rc;
 
@@ -1734,7 +1696,7 @@ int nvmx_handle_vmread(struct cpu_user_r
     if ( rc != X86EMUL_OKAY )
         return rc;
 
-    value = __get_vvmcs(nvcpu->nv_vvmcx, reg_read(regs, decode.reg2));
+    value = get_vvmcs(v, reg_read(regs, decode.reg2));
 
     switch ( decode.type ) {
     case VMX_INST_MEMREG_TYPE_MEMORY:
@@ -1755,7 +1717,6 @@ int nvmx_handle_vmwrite(struct cpu_user_
 {
     struct vcpu *v = current;
     struct vmx_inst_decoded decode;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     unsigned long operand; 
     u64 vmcs_encoding;
     bool_t okay = 1;
@@ -1765,7 +1726,7 @@ int nvmx_handle_vmwrite(struct cpu_user_
         return X86EMUL_EXCEPTION;
 
     vmcs_encoding = reg_read(regs, decode.reg2);
-    __set_vvmcs(nvcpu->nv_vvmcx, vmcs_encoding, operand);
+    set_vvmcs(v, vmcs_encoding, operand);
 
     switch ( vmcs_encoding & ~VMCS_HIGH(0) )
     {
@@ -2197,7 +2158,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
         }
         else if ( (intr_info & valid_mask) == valid_mask )
         {
-            exec_bitmap =__get_vvmcs(nvcpu->nv_vvmcx, EXCEPTION_BITMAP);
+            exec_bitmap = get_vvmcs(v, EXCEPTION_BITMAP);
 
             if ( exec_bitmap & (1 << vector) )
                 nvcpu->nv_vmexit_pending = 1;
@@ -2316,8 +2277,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
              * special handler is needed if L1 doesn't intercept rdtsc,
              * avoiding changing guest_tsc and messing up timekeeping in L1
              */
-            tsc = hvm_get_guest_tsc(v);
-            tsc += __get_vvmcs(nvcpu->nv_vvmcx, TSC_OFFSET);
+            tsc = hvm_get_guest_tsc(v) + get_vvmcs(v, TSC_OFFSET);
             regs->eax = (uint32_t)tsc;
             regs->edx = (uint32_t)(tsc >> 32);
             update_guest_eip();
@@ -2406,7 +2366,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
                 val = *reg;
                 if ( cr == 0 )
                 {
-                    u64 cr0_gh_mask = __get_vvmcs(nvcpu->nv_vvmcx, CR0_GUEST_HOST_MASK);
+                    u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
 
                     __vmread(CR0_READ_SHADOW, &old_val);
                     changed_bits = old_val ^ val;
@@ -2414,14 +2374,15 @@ int nvmx_n2_vmexit_handler(struct cpu_us
                         nvcpu->nv_vmexit_pending = 1;
                     else
                     {
-                        u64 guest_cr0 = __get_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0);
-                        __set_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0,
-                                    (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
+                        u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
+
+                        set_vvmcs(v, GUEST_CR0,
+                                  (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
                     }
                 }
                 else if ( cr == 4 )
                 {
-                    u64 cr4_gh_mask = __get_vvmcs(nvcpu->nv_vvmcx, CR4_GUEST_HOST_MASK);
+                    u64 cr4_gh_mask = get_vvmcs(v, CR4_GUEST_HOST_MASK);
 
                     __vmread(CR4_READ_SHADOW, &old_val);
                     changed_bits = old_val ^ val;
@@ -2429,9 +2390,10 @@ int nvmx_n2_vmexit_handler(struct cpu_us
                         nvcpu->nv_vmexit_pending = 1;
                     else
                     {
-                        u64 guest_cr4 = __get_vvmcs(nvcpu->nv_vvmcx, GUEST_CR4);
-                        __set_vvmcs(nvcpu->nv_vvmcx, GUEST_CR4,
-                                    (guest_cr4 & cr4_gh_mask) | (val & ~cr4_gh_mask));
+                        u64 guest_cr4 = get_vvmcs(v, GUEST_CR4);
+
+                        set_vvmcs(v, GUEST_CR4,
+                                  (guest_cr4 & cr4_gh_mask) | (val & ~cr4_gh_mask));
                     }
                 }
                 else
@@ -2440,20 +2402,21 @@ int nvmx_n2_vmexit_handler(struct cpu_us
             }
             case VMX_CONTROL_REG_ACCESS_TYPE_CLTS:
             {
-                u64 cr0_gh_mask = __get_vvmcs(nvcpu->nv_vvmcx, CR0_GUEST_HOST_MASK);
+                u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
 
                 if ( cr0_gh_mask & X86_CR0_TS )
                     nvcpu->nv_vmexit_pending = 1;
                 else
                 {
-                    u64 guest_cr0 = __get_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0);
-                    __set_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0, (guest_cr0 & ~X86_CR0_TS));
+                    u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
+
+                    set_vvmcs(v, GUEST_CR0, (guest_cr0 & ~X86_CR0_TS));
                 }
                 break;
             }
             case VMX_CONTROL_REG_ACCESS_TYPE_LMSW:
             {
-                u64 cr0_gh_mask = __get_vvmcs(nvcpu->nv_vvmcx, CR0_GUEST_HOST_MASK);
+                u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
 
                 __vmread(CR0_READ_SHADOW, &old_val);
                 old_val &= X86_CR0_PE|X86_CR0_MP|X86_CR0_EM|X86_CR0_TS;
@@ -2464,8 +2427,9 @@ int nvmx_n2_vmexit_handler(struct cpu_us
                     nvcpu->nv_vmexit_pending = 1;
                 else
                 {
-                    u64 guest_cr0 = __get_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0);
-                    __set_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0, (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
+                    u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
+
+                    set_vvmcs(v, GUEST_CR0, (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
                 }
                 break;
             }
@@ -2517,7 +2481,7 @@ void nvmx_set_cr_read_shadow(struct vcpu
     if ( !nestedhvm_vmswitch_in_progress(v) )
     {
         unsigned long virtual_cr_mask = 
-            __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, mask_field);
+            get_vvmcs(v, mask_field);
 
         /*
          * We get here when L2 changed cr in a way that did not change
@@ -2529,7 +2493,7 @@ void nvmx_set_cr_read_shadow(struct vcpu
          */
         v->arch.hvm_vcpu.guest_cr[cr] &= ~virtual_cr_mask;
         v->arch.hvm_vcpu.guest_cr[cr] |= virtual_cr_mask &
-            __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, cr_field);
+            get_vvmcs(v, cr_field);
     }
 
     /* nvcpu.guest_cr is what L2 write to cr actually. */
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -95,7 +95,7 @@ struct arch_vmx_struct {
     /* Physical address of VMCS. */
     paddr_t              vmcs_pa;
     /* VMCS shadow machine address. */
-    paddr_t             vmcs_shadow_maddr;
+    paddr_t              vmcs_shadow_maddr;
 
     /* Protects remote usage of VMCS (VMPTRLD/VMCLEAR). */
     spinlock_t           vmcs_lock;
@@ -492,10 +492,10 @@ void vmx_vmcs_switch(paddr_t from, paddr
 void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector);
 void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector);
 int vmx_check_msr_bitmap(unsigned long *msr_bitmap, u32 msr, int access_type);
-void virtual_vmcs_enter(void *vvmcs);
-void virtual_vmcs_exit(void *vvmcs);
-u64 virtual_vmcs_vmread(void *vvmcs, u32 vmcs_encoding);
-void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val);
+void virtual_vmcs_enter(const struct vcpu *);
+void virtual_vmcs_exit(const struct vcpu *);
+u64 virtual_vmcs_vmread(const struct vcpu *, u32 encoding);
+void virtual_vmcs_vmwrite(const struct vcpu *, u32 encoding, u64 val);
 
 static inline int vmx_add_guest_msr(u32 msr)
 {
--- a/xen/include/asm-x86/hvm/vmx/vvmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
@@ -181,18 +181,20 @@ enum vvmcs_encoding_type {
     VVMCS_TYPE_HSTATE,
 };
 
-u64 __get_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding);
-u64 __get_vvmcs_real(void *vvmcs, u32 vmcs_encoding);
-void __set_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding, u64 val);
-void __set_vvmcs_real(void *vvmcs, u32 vmcs_encoding, u64 val);
-
-#define __get_vvmcs(_vvmcs, _vmcs_encoding) \
-  (cpu_has_vmx_vmcs_shadowing ? __get_vvmcs_real(_vvmcs, _vmcs_encoding) \
-                              : __get_vvmcs_virtual(_vvmcs, _vmcs_encoding))
-
-#define __set_vvmcs(_vvmcs, _vmcs_encoding, _val) \
-  (cpu_has_vmx_vmcs_shadowing ? __set_vvmcs_real(_vvmcs, _vmcs_encoding, _val) \
-                              : __set_vvmcs_virtual(_vvmcs, _vmcs_encoding, _val))
+u64 get_vvmcs_virtual(void *vvmcs, u32 encoding);
+u64 get_vvmcs_real(const struct vcpu *, u32 encoding);
+void set_vvmcs_virtual(void *vvmcs, u32 encoding, u64 val);
+void set_vvmcs_real(const struct vcpu *, u32 encoding, u64 val);
+
+#define get_vvmcs(vcpu, encoding) \
+  (cpu_has_vmx_vmcs_shadowing ? \
+   get_vvmcs_real(vcpu, encoding) : \
+   get_vvmcs_virtual(vcpu_nestedhvm(vcpu).nv_vvmcx, encoding))
+
+#define set_vvmcs(vcpu, encoding, val) \
+  (cpu_has_vmx_vmcs_shadowing ? \
+   set_vvmcs_real(vcpu, encoding, val) : \
+   set_vvmcs_virtual(vcpu_nestedhvm(vcpu).nv_vvmcx, encoding, val))
 
 uint64_t get_shadow_eptp(struct vcpu *v);
 



[-- Attachment #2: VMX-VMCS-shadow-addr.patch --]
[-- Type: text/plain, Size: 40571 bytes --]

vVMX: use latched VMCS machine address

Instead of calling domain_page_map_to_mfn() over and over, latch the
guest VMCS machine address unconditionally (i.e. independent of whether
VMCS shadowing is supported by the hardware).

Since this requires altering the parameters of __[gs]et_vmcs{,_real}()
(and hence all their callers) anyway, take the opportunity to also drop
the bogus double underscores from their names (and from
__[gs]et_vmcs_virtual() as well).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -191,13 +191,13 @@ static int nvmx_intr_intercept(struct vc
         if ( intack.source == hvm_intsrc_pic ||
                  intack.source == hvm_intsrc_lapic )
         {
-            ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
+            ctrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
             if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
                 return 0;
 
             vmx_inject_extint(intack.vector, intack.source);
 
-            ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, VM_EXIT_CONTROLS);
+            ctrl = get_vvmcs(v, VM_EXIT_CONTROLS);
             if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
             {
                 /* for now, duplicate the ack path in vmx_intr_assist */
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -932,37 +932,36 @@ void vmx_vmcs_switch(paddr_t from, paddr
     spin_unlock(&vmx->vmcs_lock);
 }
 
-void virtual_vmcs_enter(void *vvmcs)
+void virtual_vmcs_enter(const struct vcpu *v)
 {
-    __vmptrld(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
+    __vmptrld(v->arch.hvm_vmx.vmcs_shadow_maddr);
 }
 
-void virtual_vmcs_exit(void *vvmcs)
+void virtual_vmcs_exit(const struct vcpu *v)
 {
     paddr_t cur = this_cpu(current_vmcs);
 
-    __vmpclear(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
+    __vmpclear(v->arch.hvm_vmx.vmcs_shadow_maddr);
     if ( cur )
         __vmptrld(cur);
-
 }
 
-u64 virtual_vmcs_vmread(void *vvmcs, u32 vmcs_encoding)
+u64 virtual_vmcs_vmread(const struct vcpu *v, u32 vmcs_encoding)
 {
     u64 res;
 
-    virtual_vmcs_enter(vvmcs);
+    virtual_vmcs_enter(v);
     __vmread(vmcs_encoding, &res);
-    virtual_vmcs_exit(vvmcs);
+    virtual_vmcs_exit(v);
 
     return res;
 }
 
-void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val)
+void virtual_vmcs_vmwrite(const struct vcpu *v, u32 vmcs_encoding, u64 val)
 {
-    virtual_vmcs_enter(vvmcs);
+    virtual_vmcs_enter(v);
     __vmwrite(vmcs_encoding, val);
-    virtual_vmcs_exit(vvmcs);
+    virtual_vmcs_exit(v);
 }
 
 static int construct_vmcs(struct vcpu *v)
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1464,8 +1464,7 @@ void vmx_inject_extint(int trap, uint8_t
     u32    pin_based_cntrl;
 
     if ( nestedhvm_vcpu_in_guestmode(v) ) {
-        pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, 
-                                     PIN_BASED_VM_EXEC_CONTROL);
+        pin_based_cntrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
         if ( pin_based_cntrl & PIN_BASED_EXT_INTR_MASK ) {
             nvmx_enqueue_n2_exceptions (v, 
                INTR_INFO_VALID_MASK |
@@ -1485,8 +1484,7 @@ void vmx_inject_nmi(void)
     u32    pin_based_cntrl;
 
     if ( nestedhvm_vcpu_in_guestmode(v) ) {
-        pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, 
-                                     PIN_BASED_VM_EXEC_CONTROL);
+        pin_based_cntrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
         if ( pin_based_cntrl & PIN_BASED_NMI_EXITING ) {
             nvmx_enqueue_n2_exceptions (v, 
                INTR_INFO_VALID_MASK |
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -175,11 +175,7 @@ int nvmx_vcpu_reset(struct vcpu *v)
 
 uint64_t nvmx_vcpu_eptp_base(struct vcpu *v)
 {
-    uint64_t eptp_base;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-
-    eptp_base = __get_vvmcs(nvcpu->nv_vvmcx, EPT_POINTER);
-    return eptp_base & PAGE_MASK;
+    return get_vvmcs(v, EPT_POINTER) & PAGE_MASK;
 }
 
 bool_t nvmx_ept_enabled(struct vcpu *v)
@@ -236,7 +232,7 @@ static int vvmcs_offset(u32 width, u32 t
     return offset;
 }
 
-u64 __get_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding)
+u64 get_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding)
 {
     union vmcs_encoding enc;
     u64 *content = (u64 *) vvmcs;
@@ -266,12 +262,12 @@ u64 __get_vvmcs_virtual(void *vvmcs, u32
     return res;
 }
 
-u64 __get_vvmcs_real(void *vvmcs, u32 vmcs_encoding)
+u64 get_vvmcs_real(const struct vcpu *v, u32 encoding)
 {
-    return virtual_vmcs_vmread(vvmcs, vmcs_encoding);
+    return virtual_vmcs_vmread(v, encoding);
 }
 
-void __set_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding, u64 val)
+void set_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding, u64 val)
 {
     union vmcs_encoding enc;
     u64 *content = (u64 *) vvmcs;
@@ -307,9 +303,9 @@ void __set_vvmcs_virtual(void *vvmcs, u3
     content[offset] = res;
 }
 
-void __set_vvmcs_real(void *vvmcs, u32 vmcs_encoding, u64 val)
+void set_vvmcs_real(const struct vcpu *v, u32 encoding, u64 val)
 {
-    virtual_vmcs_vmwrite(vvmcs, vmcs_encoding, val);
+    virtual_vmcs_vmwrite(v, encoding, val);
 }
 
 static unsigned long reg_read(struct cpu_user_regs *regs,
@@ -331,25 +327,20 @@ static void reg_write(struct cpu_user_re
 
 static inline u32 __n2_pin_exec_control(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-
-    return __get_vvmcs(nvcpu->nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
+    return get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
 }
 
 static inline u32 __n2_exec_control(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-
-    return __get_vvmcs(nvcpu->nv_vvmcx, CPU_BASED_VM_EXEC_CONTROL);
+    return get_vvmcs(v, CPU_BASED_VM_EXEC_CONTROL);
 }
 
 static inline u32 __n2_secondary_exec_control(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u64 second_ctrl = 0;
 
     if ( __n2_exec_control(v) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
-        second_ctrl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL);
+        second_ctrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
 
     return second_ctrl;
 }
@@ -502,18 +493,17 @@ static void vmreturn(struct cpu_user_reg
 bool_t nvmx_intercepts_exception(struct vcpu *v, unsigned int trap,
                                  int error_code)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u32 exception_bitmap, pfec_match=0, pfec_mask=0;
     int r;
 
     ASSERT ( trap < 32 );
 
-    exception_bitmap = __get_vvmcs(nvcpu->nv_vvmcx, EXCEPTION_BITMAP);
+    exception_bitmap = get_vvmcs(v, EXCEPTION_BITMAP);
     r = exception_bitmap & (1 << trap) ? 1: 0;
 
     if ( trap == TRAP_page_fault ) {
-        pfec_match = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MATCH);
-        pfec_mask  = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MASK);
+        pfec_match = get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MATCH);
+        pfec_mask  = get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MASK);
         if ( (error_code & pfec_mask) != pfec_match )
             r = !r;
     }
@@ -528,9 +518,7 @@ static inline u32 __shadow_control(struc
                                  unsigned int field,
                                  u32 host_value)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-
-    return (u32) __get_vvmcs(nvcpu->nv_vvmcx, field) | host_value;
+    return get_vvmcs(v, field) | host_value;
 }
 
 static void set_shadow_control(struct vcpu *v,
@@ -597,13 +585,12 @@ void nvmx_update_secondary_exec_control(
                                         unsigned long host_cntrl)
 {
     u32 shadow_cntrl;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
     u32 apicv_bit = SECONDARY_EXEC_APIC_REGISTER_VIRT |
                     SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
 
     host_cntrl &= ~apicv_bit;
-    shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL);
+    shadow_cntrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
 
     /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
     ASSERT(!(shadow_cntrl & apicv_bit));
@@ -616,10 +603,9 @@ void nvmx_update_secondary_exec_control(
 static void nvmx_update_pin_control(struct vcpu *v, unsigned long host_cntrl)
 {
     u32 shadow_cntrl;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
 
     host_cntrl &= ~PIN_BASED_POSTED_INTERRUPT;
-    shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
+    shadow_cntrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
 
     /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
     ASSERT(!(shadow_cntrl & PIN_BASED_POSTED_INTERRUPT));
@@ -631,9 +617,8 @@ static void nvmx_update_pin_control(stru
 static void nvmx_update_exit_control(struct vcpu *v, unsigned long host_cntrl)
 {
     u32 shadow_cntrl;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
 
-    shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_CONTROLS);
+    shadow_cntrl = get_vvmcs(v, VM_EXIT_CONTROLS);
     shadow_cntrl &= ~(VM_EXIT_SAVE_DEBUG_CNTRLS 
                       | VM_EXIT_LOAD_HOST_PAT
                       | VM_EXIT_LOAD_HOST_EFER
@@ -645,9 +630,8 @@ static void nvmx_update_exit_control(str
 static void nvmx_update_entry_control(struct vcpu *v)
 {
     u32 shadow_cntrl;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
 
-    shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, VM_ENTRY_CONTROLS);
+    shadow_cntrl = get_vvmcs(v, VM_ENTRY_CONTROLS);
     shadow_cntrl &= ~(VM_ENTRY_LOAD_GUEST_PAT
                       | VM_ENTRY_LOAD_GUEST_EFER
                       | VM_ENTRY_LOAD_PERF_GLOBAL_CTRL);
@@ -661,7 +645,6 @@ void nvmx_update_exception_bitmap(struct
 
 static void nvmx_update_apic_access_address(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u32 ctrl;
 
     ctrl = __n2_secondary_exec_control(v);
@@ -671,7 +654,7 @@ static void nvmx_update_apic_access_addr
         unsigned long apic_gpfn;
         struct page_info *apic_pg;
 
-        apic_gpfn = __get_vvmcs(nvcpu->nv_vvmcx, APIC_ACCESS_ADDR) >> PAGE_SHIFT;
+        apic_gpfn = get_vvmcs(v, APIC_ACCESS_ADDR) >> PAGE_SHIFT;
         apic_pg = get_page_from_gfn(v->domain, apic_gpfn, &p2mt, P2M_ALLOC);
         ASSERT(apic_pg && !p2m_is_paging(p2mt));
         __vmwrite(APIC_ACCESS_ADDR, page_to_maddr(apic_pg));
@@ -683,7 +666,6 @@ static void nvmx_update_apic_access_addr
 
 static void nvmx_update_virtual_apic_address(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u32 ctrl;
 
     ctrl = __n2_exec_control(v);
@@ -693,7 +675,7 @@ static void nvmx_update_virtual_apic_add
         unsigned long vapic_gpfn;
         struct page_info *vapic_pg;
 
-        vapic_gpfn = __get_vvmcs(nvcpu->nv_vvmcx, VIRTUAL_APIC_PAGE_ADDR) >> PAGE_SHIFT;
+        vapic_gpfn = get_vvmcs(v, VIRTUAL_APIC_PAGE_ADDR) >> PAGE_SHIFT;
         vapic_pg = get_page_from_gfn(v->domain, vapic_gpfn, &p2mt, P2M_ALLOC);
         ASSERT(vapic_pg && !p2m_is_paging(p2mt));
         __vmwrite(VIRTUAL_APIC_PAGE_ADDR, page_to_maddr(vapic_pg));
@@ -705,23 +687,20 @@ static void nvmx_update_virtual_apic_add
 
 static void nvmx_update_tpr_threshold(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u32 ctrl = __n2_exec_control(v);
+
     if ( ctrl & CPU_BASED_TPR_SHADOW )
-        __vmwrite(TPR_THRESHOLD, __get_vvmcs(nvcpu->nv_vvmcx, TPR_THRESHOLD));
+        __vmwrite(TPR_THRESHOLD, get_vvmcs(v, TPR_THRESHOLD));
     else
         __vmwrite(TPR_THRESHOLD, 0);
 }
 
 static void nvmx_update_pfec(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
-
     __vmwrite(PAGE_FAULT_ERROR_CODE_MASK,
-        __get_vvmcs(vvmcs, PAGE_FAULT_ERROR_CODE_MASK));
+              get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MASK));
     __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH,
-        __get_vvmcs(vvmcs, PAGE_FAULT_ERROR_CODE_MATCH));
+              get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MATCH));
 }
 
 static void __clear_current_vvmcs(struct vcpu *v)
@@ -739,7 +718,7 @@ static bool_t __must_check _map_msr_bitm
 
     if ( nvmx->msrbitmap )
         hvm_unmap_guest_frame(nvmx->msrbitmap, 1);
-    gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, MSR_BITMAP);
+    gpa = get_vvmcs(v, MSR_BITMAP);
     nvmx->msrbitmap = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1);
 
     return nvmx->msrbitmap != NULL;
@@ -754,7 +733,7 @@ static bool_t __must_check _map_io_bitma
     index = vmcs_reg == IO_BITMAP_A ? 0 : 1;
     if (nvmx->iobitmap[index])
         hvm_unmap_guest_frame(nvmx->iobitmap[index], 1);
-    gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg);
+    gpa = get_vvmcs(v, vmcs_reg);
     nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1);
 
     return nvmx->iobitmap[index] != NULL;
@@ -777,6 +756,7 @@ static void nvmx_purge_vvmcs(struct vcpu
         hvm_unmap_guest_frame(nvcpu->nv_vvmcx, 1);
     nvcpu->nv_vvmcx = NULL;
     nvcpu->nv_vvmcxaddr = VMCX_EADDR;
+    v->arch.hvm_vmx.vmcs_shadow_maddr = 0;
     for (i=0; i<2; i++) {
         if ( nvmx->iobitmap[i] ) {
             hvm_unmap_guest_frame(nvmx->iobitmap[i], 1);
@@ -792,11 +772,10 @@ static void nvmx_purge_vvmcs(struct vcpu
 u64 nvmx_get_tsc_offset(struct vcpu *v)
 {
     u64 offset = 0;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
 
-    if ( __get_vvmcs(nvcpu->nv_vvmcx, CPU_BASED_VM_EXEC_CONTROL) &
+    if ( get_vvmcs(v, CPU_BASED_VM_EXEC_CONTROL) &
          CPU_BASED_USE_TSC_OFFSETING )
-        offset = __get_vvmcs(nvcpu->nv_vvmcx, TSC_OFFSET);
+        offset = get_vvmcs(v, TSC_OFFSET);
 
     return offset;
 }
@@ -911,12 +890,9 @@ static struct vmcs_host_to_guest {
     {HOST_SYSENTER_EIP, GUEST_SYSENTER_EIP},
 };
 
-static void vvmcs_to_shadow(void *vvmcs, unsigned int field)
+static void vvmcs_to_shadow(const struct vcpu *v, unsigned int field)
 {
-    u64 value;
-
-    value = __get_vvmcs(vvmcs, field);
-    __vmwrite(field, value);
+    __vmwrite(field, get_vvmcs(v, field));
 }
 
 static void vvmcs_to_shadow_bulk(struct vcpu *v, unsigned int n,
@@ -950,15 +926,15 @@ static void vvmcs_to_shadow_bulk(struct 
 
 fallback:
     for ( i = 0; i < n; i++ )
-        vvmcs_to_shadow(vvmcs, field[i]);
+        vvmcs_to_shadow(v, field[i]);
 }
 
-static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(const struct vcpu *v, unsigned int field)
 {
     unsigned long value;
 
     if ( __vmread_safe(field, &value) )
-        __set_vvmcs(vvmcs, field, value);
+        set_vvmcs(v, field, value);
 }
 
 static void shadow_to_vvmcs_bulk(struct vcpu *v, unsigned int n,
@@ -992,7 +968,7 @@ static void shadow_to_vvmcs_bulk(struct 
 
 fallback:
     for ( i = 0; i < n; i++ )
-        shadow_to_vvmcs(vvmcs, field[i]);
+        shadow_to_vvmcs(v, field[i]);
 }
 
 static void load_shadow_control(struct vcpu *v)
@@ -1017,7 +993,6 @@ static void load_shadow_control(struct v
 static void load_shadow_guest_state(struct vcpu *v)
 {
     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
     u32 control;
     u64 cr_gh_mask, cr_read_shadow;
 
@@ -1031,18 +1006,18 @@ static void load_shadow_guest_state(stru
     vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmcs_gstate_field),
                          vmcs_gstate_field);
 
-    nvcpu->guest_cr[0] = __get_vvmcs(vvmcs, CR0_READ_SHADOW);
-    nvcpu->guest_cr[4] = __get_vvmcs(vvmcs, CR4_READ_SHADOW);
-    hvm_set_cr0(__get_vvmcs(vvmcs, GUEST_CR0), 1);
-    hvm_set_cr4(__get_vvmcs(vvmcs, GUEST_CR4), 1);
-    hvm_set_cr3(__get_vvmcs(vvmcs, GUEST_CR3), 1);
+    nvcpu->guest_cr[0] = get_vvmcs(v, CR0_READ_SHADOW);
+    nvcpu->guest_cr[4] = get_vvmcs(v, CR4_READ_SHADOW);
+    hvm_set_cr0(get_vvmcs(v, GUEST_CR0), 1);
+    hvm_set_cr4(get_vvmcs(v, GUEST_CR4), 1);
+    hvm_set_cr3(get_vvmcs(v, GUEST_CR3), 1);
 
-    control = __get_vvmcs(vvmcs, VM_ENTRY_CONTROLS);
+    control = get_vvmcs(v, VM_ENTRY_CONTROLS);
     if ( control & VM_ENTRY_LOAD_GUEST_PAT )
-        hvm_set_guest_pat(v, __get_vvmcs(vvmcs, GUEST_PAT));
+        hvm_set_guest_pat(v, get_vvmcs(v, GUEST_PAT));
     if ( control & VM_ENTRY_LOAD_PERF_GLOBAL_CTRL )
         hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL,
-                                __get_vvmcs(vvmcs, GUEST_PERF_GLOBAL_CTRL), 0);
+                                get_vvmcs(v, GUEST_PERF_GLOBAL_CTRL), 0);
 
     hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
 
@@ -1053,14 +1028,14 @@ static void load_shadow_guest_state(stru
      * guest host mask to 0xffffffff in shadow VMCS (follow the host L1 VMCS),
      * then calculate the corresponding read shadow separately for CR0 and CR4.
      */
-    cr_gh_mask = __get_vvmcs(vvmcs, CR0_GUEST_HOST_MASK);
-    cr_read_shadow = (__get_vvmcs(vvmcs, GUEST_CR0) & ~cr_gh_mask) |
-                     (__get_vvmcs(vvmcs, CR0_READ_SHADOW) & cr_gh_mask);
+    cr_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
+    cr_read_shadow = (get_vvmcs(v, GUEST_CR0) & ~cr_gh_mask) |
+                     (get_vvmcs(v, CR0_READ_SHADOW) & cr_gh_mask);
     __vmwrite(CR0_READ_SHADOW, cr_read_shadow);
 
-    cr_gh_mask = __get_vvmcs(vvmcs, CR4_GUEST_HOST_MASK);
-    cr_read_shadow = (__get_vvmcs(vvmcs, GUEST_CR4) & ~cr_gh_mask) |
-                     (__get_vvmcs(vvmcs, CR4_READ_SHADOW) & cr_gh_mask);
+    cr_gh_mask = get_vvmcs(v, CR4_GUEST_HOST_MASK);
+    cr_read_shadow = (get_vvmcs(v, GUEST_CR4) & ~cr_gh_mask) |
+                     (get_vvmcs(v, CR4_READ_SHADOW) & cr_gh_mask);
     __vmwrite(CR4_READ_SHADOW, cr_read_shadow);
 
     /* TODO: CR3 target control */
@@ -1084,11 +1059,11 @@ static uint64_t get_host_eptp(struct vcp
     return ept_get_eptp(ept_data);
 }
 
-static bool_t nvmx_vpid_enabled(struct nestedvcpu *nvcpu)
+static bool_t nvmx_vpid_enabled(const struct vcpu *v)
 {
     uint32_t second_cntl;
 
-    second_cntl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL);
+    second_cntl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
     if ( second_cntl & SECONDARY_EXEC_ENABLE_VPID )
         return 1;
     return 0;
@@ -1099,32 +1074,38 @@ static void nvmx_set_vmcs_pointer(struct
     unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
     paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
 
-    __vmpclear(vvmcs_maddr);
-    vvmcs->vmcs_revision_id |= VMCS_RID_TYPE_MASK;
+    if ( cpu_has_vmx_vmcs_shadowing )
+    {
+        __vmpclear(vvmcs_maddr);
+        vvmcs->vmcs_revision_id |= VMCS_RID_TYPE_MASK;
+        __vmwrite(VMCS_LINK_POINTER, vvmcs_maddr);
+        __vmwrite(VMREAD_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmread_bitmap));
+        __vmwrite(VMWRITE_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmwrite_bitmap));
+    }
     v->arch.hvm_vmx.vmcs_shadow_maddr = vvmcs_maddr;
-    __vmwrite(VMCS_LINK_POINTER, vvmcs_maddr);
-    __vmwrite(VMREAD_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmread_bitmap));
-    __vmwrite(VMWRITE_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmwrite_bitmap));
 }
 
 static void nvmx_clear_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
 {
-    unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
-    paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
-
-    __vmpclear(vvmcs_maddr);
-    vvmcs->vmcs_revision_id &= ~VMCS_RID_TYPE_MASK;
     v->arch.hvm_vmx.vmcs_shadow_maddr = 0;
-    __vmwrite(VMCS_LINK_POINTER, ~0ul);
-    __vmwrite(VMREAD_BITMAP, 0);
-    __vmwrite(VMWRITE_BITMAP, 0);
+
+    if ( cpu_has_vmx_vmcs_shadowing )
+    {
+        unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
+        paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
+
+        __vmpclear(vvmcs_maddr);
+        vvmcs->vmcs_revision_id &= ~VMCS_RID_TYPE_MASK;
+        __vmwrite(VMCS_LINK_POINTER, ~0ul);
+        __vmwrite(VMREAD_BITMAP, 0);
+        __vmwrite(VMWRITE_BITMAP, 0);
+    }
 }
 
 static void virtual_vmentry(struct cpu_user_regs *regs)
 {
     struct vcpu *v = current;
     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
     unsigned long lm_l1, lm_l2;
 
     vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n2vmcx_pa);
@@ -1143,8 +1124,7 @@ static void virtual_vmentry(struct cpu_u
      * L1 exit_controls
      */
     lm_l1 = !!hvm_long_mode_enabled(v);
-    lm_l2 = !!(__get_vvmcs(vvmcs, VM_ENTRY_CONTROLS) &
-                           VM_ENTRY_IA32E_MODE);
+    lm_l2 = !!(get_vvmcs(v, VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
 
     if ( lm_l2 )
         v->arch.hvm_vcpu.guest_efer |= EFER_LMA | EFER_LME;
@@ -1161,9 +1141,9 @@ static void virtual_vmentry(struct cpu_u
          !(v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
         vvmcs_to_shadow_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields);
 
-    regs->eip = __get_vvmcs(vvmcs, GUEST_RIP);
-    regs->esp = __get_vvmcs(vvmcs, GUEST_RSP);
-    regs->eflags = __get_vvmcs(vvmcs, GUEST_RFLAGS);
+    regs->eip = get_vvmcs(v, GUEST_RIP);
+    regs->esp = get_vvmcs(v, GUEST_RSP);
+    regs->eflags = get_vvmcs(v, GUEST_RFLAGS);
 
     /* updating host cr0 to sync TS bit */
     __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
@@ -1175,10 +1155,10 @@ static void virtual_vmentry(struct cpu_u
         __vmwrite(EPT_POINTER, get_host_eptp(v));
 
     /* nested VPID support! */
-    if ( cpu_has_vmx_vpid && nvmx_vpid_enabled(nvcpu) )
+    if ( cpu_has_vmx_vpid && nvmx_vpid_enabled(v) )
     {
         struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
-        uint32_t new_vpid =  __get_vvmcs(vvmcs, VIRTUAL_PROCESSOR_ID);
+        uint32_t new_vpid = get_vvmcs(v, VIRTUAL_PROCESSOR_ID);
 
         if ( nvmx->guest_vpid != new_vpid )
         {
@@ -1191,34 +1171,29 @@ static void virtual_vmentry(struct cpu_u
 
 static void sync_vvmcs_guest_state(struct vcpu *v, struct cpu_user_regs *regs)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
-
     /* copy shadow vmcs.gstate back to vvmcs.gstate */
     shadow_to_vvmcs_bulk(v, ARRAY_SIZE(vmcs_gstate_field),
                          vmcs_gstate_field);
     /* RIP, RSP are in user regs */
-    __set_vvmcs(vvmcs, GUEST_RIP, regs->eip);
-    __set_vvmcs(vvmcs, GUEST_RSP, regs->esp);
+    set_vvmcs(v, GUEST_RIP, regs->eip);
+    set_vvmcs(v, GUEST_RSP, regs->esp);
 
     /* CR3 sync if exec doesn't want cr3 load exiting: i.e. nested EPT */
     if ( !(__n2_exec_control(v) & CPU_BASED_CR3_LOAD_EXITING) )
-        shadow_to_vvmcs(vvmcs, GUEST_CR3);
+        shadow_to_vvmcs(v, GUEST_CR3);
 }
 
 static void sync_vvmcs_ro(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
 
     shadow_to_vvmcs_bulk(v, ARRAY_SIZE(vmcs_ro_field), vmcs_ro_field);
 
     /* Adjust exit_reason/exit_qualifciation for violation case */
-    if ( __get_vvmcs(vvmcs, VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION )
+    if ( get_vvmcs(v, VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION )
     {
-        __set_vvmcs(vvmcs, EXIT_QUALIFICATION, nvmx->ept.exit_qual);
-        __set_vvmcs(vvmcs, VM_EXIT_REASON, nvmx->ept.exit_reason);
+        set_vvmcs(v, EXIT_QUALIFICATION, nvmx->ept.exit_qual);
+        set_vvmcs(v, VM_EXIT_REASON, nvmx->ept.exit_reason);
     }
 }
 
@@ -1226,34 +1201,32 @@ static void load_vvmcs_host_state(struct
 {
     int i;
     u64 r;
-    void *vvmcs = vcpu_nestedhvm(v).nv_vvmcx;
     u32 control;
 
     for ( i = 0; i < ARRAY_SIZE(vmcs_h2g_field); i++ )
     {
-        r = __get_vvmcs(vvmcs, vmcs_h2g_field[i].host_field);
+        r = get_vvmcs(v, vmcs_h2g_field[i].host_field);
         __vmwrite(vmcs_h2g_field[i].guest_field, r);
     }
 
-    hvm_set_cr0(__get_vvmcs(vvmcs, HOST_CR0), 1);
-    hvm_set_cr4(__get_vvmcs(vvmcs, HOST_CR4), 1);
-    hvm_set_cr3(__get_vvmcs(vvmcs, HOST_CR3), 1);
+    hvm_set_cr0(get_vvmcs(v, HOST_CR0), 1);
+    hvm_set_cr4(get_vvmcs(v, HOST_CR4), 1);
+    hvm_set_cr3(get_vvmcs(v, HOST_CR3), 1);
 
-    control = __get_vvmcs(vvmcs, VM_EXIT_CONTROLS);
+    control = get_vvmcs(v, VM_EXIT_CONTROLS);
     if ( control & VM_EXIT_LOAD_HOST_PAT )
-        hvm_set_guest_pat(v, __get_vvmcs(vvmcs, HOST_PAT));
+        hvm_set_guest_pat(v, get_vvmcs(v, HOST_PAT));
     if ( control & VM_EXIT_LOAD_PERF_GLOBAL_CTRL )
         hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL,
-                                __get_vvmcs(vvmcs, HOST_PERF_GLOBAL_CTRL), 1);
+                                get_vvmcs(v, HOST_PERF_GLOBAL_CTRL), 1);
 
     hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
 
-    __set_vvmcs(vvmcs, VM_ENTRY_INTR_INFO, 0);
+    set_vvmcs(v, VM_ENTRY_INTR_INFO, 0);
 }
 
 static void sync_exception_state(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
 
     if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) )
@@ -1263,10 +1236,9 @@ static void sync_exception_state(struct 
     {
     case X86_EVENTTYPE_EXT_INTR:
         /* rename exit_reason to EXTERNAL_INTERRUPT */
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON,
-                    EXIT_REASON_EXTERNAL_INTERRUPT);
-        __set_vvmcs(nvcpu->nv_vvmcx, EXIT_QUALIFICATION, 0);
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
+        set_vvmcs(v, VM_EXIT_REASON, EXIT_REASON_EXTERNAL_INTERRUPT);
+        set_vvmcs(v, EXIT_QUALIFICATION, 0);
+        set_vvmcs(v, VM_EXIT_INTR_INFO,
                     nvmx->intr.intr_info);
         break;
 
@@ -1274,17 +1246,13 @@ static void sync_exception_state(struct 
     case X86_EVENTTYPE_SW_INTERRUPT:
     case X86_EVENTTYPE_SW_EXCEPTION:
         /* throw to L1 */
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
-                    nvmx->intr.intr_info);
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_ERROR_CODE,
-                    nvmx->intr.error_code);
+        set_vvmcs(v, VM_EXIT_INTR_INFO, nvmx->intr.intr_info);
+        set_vvmcs(v, VM_EXIT_INTR_ERROR_CODE, nvmx->intr.error_code);
         break;
     case X86_EVENTTYPE_NMI:
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON,
-                    EXIT_REASON_EXCEPTION_NMI);
-        __set_vvmcs(nvcpu->nv_vvmcx, EXIT_QUALIFICATION, 0);
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
-                    nvmx->intr.intr_info);
+        set_vvmcs(v, VM_EXIT_REASON, EXIT_REASON_EXCEPTION_NMI);
+        set_vvmcs(v, EXIT_QUALIFICATION, 0);
+        set_vvmcs(v, VM_EXIT_INTR_INFO, nvmx->intr.intr_info);
         break;
     default:
         gdprintk(XENLOG_ERR, "Exception state %lx not handled\n",
@@ -1296,9 +1264,8 @@ static void sync_exception_state(struct 
 static void nvmx_update_apicv(struct vcpu *v)
 {
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    unsigned long reason = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON);
-    uint32_t intr_info = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO);
+    unsigned long reason = get_vvmcs(v, VM_EXIT_REASON);
+    uint32_t intr_info = get_vvmcs(v, VM_EXIT_INTR_INFO);
 
     if ( reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
          nvmx->intr.source == hvm_intsrc_lapic &&
@@ -1344,8 +1311,7 @@ static void virtual_vmexit(struct cpu_us
     nvcpu->nv_vmswitch_in_progress = 1;
 
     lm_l2 = !!hvm_long_mode_enabled(v);
-    lm_l1 = !!(__get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_CONTROLS) &
-                           VM_EXIT_IA32E_MODE);
+    lm_l1 = !!(get_vvmcs(v, VM_EXIT_CONTROLS) & VM_EXIT_IA32E_MODE);
 
     if ( lm_l1 )
         v->arch.hvm_vcpu.guest_efer |= EFER_LMA | EFER_LME;
@@ -1361,8 +1327,8 @@ static void virtual_vmexit(struct cpu_us
     if ( lm_l1 != lm_l2 )
         paging_update_paging_modes(v);
 
-    regs->eip = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RIP);
-    regs->esp = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RSP);
+    regs->eip = get_vvmcs(v, HOST_RIP);
+    regs->esp = get_vvmcs(v, HOST_RSP);
     /* VM exit clears all bits except bit 1 */
     regs->eflags = 0x2;
 
@@ -1539,7 +1505,6 @@ int nvmx_handle_vmresume(struct cpu_user
 {
     bool_t launched;
     struct vcpu *v = current;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
     int rc = vmx_inst_check_privilege(regs, 0);
 
@@ -1553,7 +1518,7 @@ int nvmx_handle_vmresume(struct cpu_user
     }
 
     launched = vvmcs_launched(&nvmx->launched_list,
-                   domain_page_map_to_mfn(nvcpu->nv_vvmcx));
+                              PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr));
     if ( !launched ) {
        vmreturn (regs, VMFAIL_VALID);
        return X86EMUL_OKAY;
@@ -1565,7 +1530,6 @@ int nvmx_handle_vmlaunch(struct cpu_user
 {
     bool_t launched;
     struct vcpu *v = current;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
     int rc = vmx_inst_check_privilege(regs, 0);
 
@@ -1579,7 +1543,7 @@ int nvmx_handle_vmlaunch(struct cpu_user
     }
 
     launched = vvmcs_launched(&nvmx->launched_list,
-                   domain_page_map_to_mfn(nvcpu->nv_vvmcx));
+                              PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr));
     if ( launched ) {
        vmreturn (regs, VMFAIL_VALID);
        return X86EMUL_OKAY;
@@ -1589,7 +1553,7 @@ int nvmx_handle_vmlaunch(struct cpu_user
         if ( rc == X86EMUL_OKAY )
         {
             if ( set_vvmcs_launched(&nvmx->launched_list,
-                    domain_page_map_to_mfn(nvcpu->nv_vvmcx)) < 0 )
+                                    PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr)) < 0 )
                 return X86EMUL_UNHANDLEABLE;
         }
     }
@@ -1644,8 +1608,7 @@ int nvmx_handle_vmptrld(struct cpu_user_
         }
     }
 
-    if ( cpu_has_vmx_vmcs_shadowing )
-        nvmx_set_vmcs_pointer(v, nvcpu->nv_vvmcx);
+    nvmx_set_vmcs_pointer(v, nvcpu->nv_vvmcx);
 
     vmreturn(regs, VMSUCCEED);
 
@@ -1694,10 +1657,10 @@ int nvmx_handle_vmclear(struct cpu_user_
         rc = VMFAIL_INVALID;
     else if ( gpa == nvcpu->nv_vvmcxaddr )
     {
-        if ( cpu_has_vmx_vmcs_shadowing )
-            nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
-        clear_vvmcs_launched(&nvmx->launched_list,
-            domain_page_map_to_mfn(nvcpu->nv_vvmcx));
+        unsigned long mfn = PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr);
+
+        nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
+        clear_vvmcs_launched(&nvmx->launched_list, mfn);
         nvmx_purge_vvmcs(v);
     }
     else 
@@ -1726,7 +1689,6 @@ int nvmx_handle_vmread(struct cpu_user_r
 {
     struct vcpu *v = current;
     struct vmx_inst_decoded decode;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u64 value = 0;
     int rc;
 
@@ -1734,7 +1696,7 @@ int nvmx_handle_vmread(struct cpu_user_r
     if ( rc != X86EMUL_OKAY )
         return rc;
 
-    value = __get_vvmcs(nvcpu->nv_vvmcx, reg_read(regs, decode.reg2));
+    value = get_vvmcs(v, reg_read(regs, decode.reg2));
 
     switch ( decode.type ) {
     case VMX_INST_MEMREG_TYPE_MEMORY:
@@ -1755,7 +1717,6 @@ int nvmx_handle_vmwrite(struct cpu_user_
 {
     struct vcpu *v = current;
     struct vmx_inst_decoded decode;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     unsigned long operand; 
     u64 vmcs_encoding;
     bool_t okay = 1;
@@ -1765,7 +1726,7 @@ int nvmx_handle_vmwrite(struct cpu_user_
         return X86EMUL_EXCEPTION;
 
     vmcs_encoding = reg_read(regs, decode.reg2);
-    __set_vvmcs(nvcpu->nv_vvmcx, vmcs_encoding, operand);
+    set_vvmcs(v, vmcs_encoding, operand);
 
     switch ( vmcs_encoding & ~VMCS_HIGH(0) )
     {
@@ -2197,7 +2158,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
         }
         else if ( (intr_info & valid_mask) == valid_mask )
         {
-            exec_bitmap =__get_vvmcs(nvcpu->nv_vvmcx, EXCEPTION_BITMAP);
+            exec_bitmap = get_vvmcs(v, EXCEPTION_BITMAP);
 
             if ( exec_bitmap & (1 << vector) )
                 nvcpu->nv_vmexit_pending = 1;
@@ -2316,8 +2277,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
              * special handler is needed if L1 doesn't intercept rdtsc,
              * avoiding changing guest_tsc and messing up timekeeping in L1
              */
-            tsc = hvm_get_guest_tsc(v);
-            tsc += __get_vvmcs(nvcpu->nv_vvmcx, TSC_OFFSET);
+            tsc = hvm_get_guest_tsc(v) + get_vvmcs(v, TSC_OFFSET);
             regs->eax = (uint32_t)tsc;
             regs->edx = (uint32_t)(tsc >> 32);
             update_guest_eip();
@@ -2406,7 +2366,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
                 val = *reg;
                 if ( cr == 0 )
                 {
-                    u64 cr0_gh_mask = __get_vvmcs(nvcpu->nv_vvmcx, CR0_GUEST_HOST_MASK);
+                    u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
 
                     __vmread(CR0_READ_SHADOW, &old_val);
                     changed_bits = old_val ^ val;
@@ -2414,14 +2374,15 @@ int nvmx_n2_vmexit_handler(struct cpu_us
                         nvcpu->nv_vmexit_pending = 1;
                     else
                     {
-                        u64 guest_cr0 = __get_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0);
-                        __set_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0,
-                                    (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
+                        u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
+
+                        set_vvmcs(v, GUEST_CR0,
+                                  (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
                     }
                 }
                 else if ( cr == 4 )
                 {
-                    u64 cr4_gh_mask = __get_vvmcs(nvcpu->nv_vvmcx, CR4_GUEST_HOST_MASK);
+                    u64 cr4_gh_mask = get_vvmcs(v, CR4_GUEST_HOST_MASK);
 
                     __vmread(CR4_READ_SHADOW, &old_val);
                     changed_bits = old_val ^ val;
@@ -2429,9 +2390,10 @@ int nvmx_n2_vmexit_handler(struct cpu_us
                         nvcpu->nv_vmexit_pending = 1;
                     else
                     {
-                        u64 guest_cr4 = __get_vvmcs(nvcpu->nv_vvmcx, GUEST_CR4);
-                        __set_vvmcs(nvcpu->nv_vvmcx, GUEST_CR4,
-                                    (guest_cr4 & cr4_gh_mask) | (val & ~cr4_gh_mask));
+                        u64 guest_cr4 = get_vvmcs(v, GUEST_CR4);
+
+                        set_vvmcs(v, GUEST_CR4,
+                                  (guest_cr4 & cr4_gh_mask) | (val & ~cr4_gh_mask));
                     }
                 }
                 else
@@ -2440,20 +2402,21 @@ int nvmx_n2_vmexit_handler(struct cpu_us
             }
             case VMX_CONTROL_REG_ACCESS_TYPE_CLTS:
             {
-                u64 cr0_gh_mask = __get_vvmcs(nvcpu->nv_vvmcx, CR0_GUEST_HOST_MASK);
+                u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
 
                 if ( cr0_gh_mask & X86_CR0_TS )
                     nvcpu->nv_vmexit_pending = 1;
                 else
                 {
-                    u64 guest_cr0 = __get_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0);
-                    __set_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0, (guest_cr0 & ~X86_CR0_TS));
+                    u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
+
+                    set_vvmcs(v, GUEST_CR0, (guest_cr0 & ~X86_CR0_TS));
                 }
                 break;
             }
             case VMX_CONTROL_REG_ACCESS_TYPE_LMSW:
             {
-                u64 cr0_gh_mask = __get_vvmcs(nvcpu->nv_vvmcx, CR0_GUEST_HOST_MASK);
+                u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
 
                 __vmread(CR0_READ_SHADOW, &old_val);
                 old_val &= X86_CR0_PE|X86_CR0_MP|X86_CR0_EM|X86_CR0_TS;
@@ -2464,8 +2427,9 @@ int nvmx_n2_vmexit_handler(struct cpu_us
                     nvcpu->nv_vmexit_pending = 1;
                 else
                 {
-                    u64 guest_cr0 = __get_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0);
-                    __set_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0, (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
+                    u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
+
+                    set_vvmcs(v, GUEST_CR0, (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
                 }
                 break;
             }
@@ -2517,7 +2481,7 @@ void nvmx_set_cr_read_shadow(struct vcpu
     if ( !nestedhvm_vmswitch_in_progress(v) )
     {
         unsigned long virtual_cr_mask = 
-            __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, mask_field);
+            get_vvmcs(v, mask_field);
 
         /*
          * We get here when L2 changed cr in a way that did not change
@@ -2529,7 +2493,7 @@ void nvmx_set_cr_read_shadow(struct vcpu
          */
         v->arch.hvm_vcpu.guest_cr[cr] &= ~virtual_cr_mask;
         v->arch.hvm_vcpu.guest_cr[cr] |= virtual_cr_mask &
-            __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, cr_field);
+            get_vvmcs(v, cr_field);
     }
 
     /* nvcpu.guest_cr is what L2 write to cr actually. */
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -95,7 +95,7 @@ struct arch_vmx_struct {
     /* Physical address of VMCS. */
     paddr_t              vmcs_pa;
     /* VMCS shadow machine address. */
-    paddr_t             vmcs_shadow_maddr;
+    paddr_t              vmcs_shadow_maddr;
 
     /* Protects remote usage of VMCS (VMPTRLD/VMCLEAR). */
     spinlock_t           vmcs_lock;
@@ -492,10 +492,10 @@ void vmx_vmcs_switch(paddr_t from, paddr
 void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector);
 void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector);
 int vmx_check_msr_bitmap(unsigned long *msr_bitmap, u32 msr, int access_type);
-void virtual_vmcs_enter(void *vvmcs);
-void virtual_vmcs_exit(void *vvmcs);
-u64 virtual_vmcs_vmread(void *vvmcs, u32 vmcs_encoding);
-void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val);
+void virtual_vmcs_enter(const struct vcpu *);
+void virtual_vmcs_exit(const struct vcpu *);
+u64 virtual_vmcs_vmread(const struct vcpu *, u32 encoding);
+void virtual_vmcs_vmwrite(const struct vcpu *, u32 encoding, u64 val);
 
 static inline int vmx_add_guest_msr(u32 msr)
 {
--- a/xen/include/asm-x86/hvm/vmx/vvmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
@@ -181,18 +181,20 @@ enum vvmcs_encoding_type {
     VVMCS_TYPE_HSTATE,
 };
 
-u64 __get_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding);
-u64 __get_vvmcs_real(void *vvmcs, u32 vmcs_encoding);
-void __set_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding, u64 val);
-void __set_vvmcs_real(void *vvmcs, u32 vmcs_encoding, u64 val);
-
-#define __get_vvmcs(_vvmcs, _vmcs_encoding) \
-  (cpu_has_vmx_vmcs_shadowing ? __get_vvmcs_real(_vvmcs, _vmcs_encoding) \
-                              : __get_vvmcs_virtual(_vvmcs, _vmcs_encoding))
-
-#define __set_vvmcs(_vvmcs, _vmcs_encoding, _val) \
-  (cpu_has_vmx_vmcs_shadowing ? __set_vvmcs_real(_vvmcs, _vmcs_encoding, _val) \
-                              : __set_vvmcs_virtual(_vvmcs, _vmcs_encoding, _val))
+u64 get_vvmcs_virtual(void *vvmcs, u32 encoding);
+u64 get_vvmcs_real(const struct vcpu *, u32 encoding);
+void set_vvmcs_virtual(void *vvmcs, u32 encoding, u64 val);
+void set_vvmcs_real(const struct vcpu *, u32 encoding, u64 val);
+
+#define get_vvmcs(vcpu, encoding) \
+  (cpu_has_vmx_vmcs_shadowing ? \
+   get_vvmcs_real(vcpu, encoding) : \
+   get_vvmcs_virtual(vcpu_nestedhvm(vcpu).nv_vvmcx, encoding))
+
+#define set_vvmcs(vcpu, encoding, val) \
+  (cpu_has_vmx_vmcs_shadowing ? \
+   set_vvmcs_real(vcpu, encoding, val) : \
+   set_vvmcs_virtual(vcpu_nestedhvm(vcpu).nv_vvmcx, encoding, val))
 
 uint64_t get_shadow_eptp(struct vcpu *v);
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] VMX: allocate VMCS pages from domain heap
  2015-10-19 15:22 ` [PATCH 2/3] VMX: allocate VMCS pages from domain heap Jan Beulich
@ 2015-10-20 10:12   ` Andrew Cooper
  2015-10-20 10:35     ` Jan Beulich
  0 siblings, 1 reply; 26+ messages in thread
From: Andrew Cooper @ 2015-10-20 10:12 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Kevin Tian, Jun Nakajima

On 19/10/15 16:22, Jan Beulich wrote:
> -static struct vmcs_struct *vmx_alloc_vmcs(void)
> +static paddr_t vmx_alloc_vmcs(void)
>  {
> +    struct page_info *pg;
>      struct vmcs_struct *vmcs;
>  
> -    if ( (vmcs = alloc_xenheap_page()) == NULL )
> +    if ( (pg = alloc_domheap_page(NULL, 0)) == NULL )

As an observation, it would be good to pass v from the caller, and NUMA
allocate against v->domain here.

> @@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
>  void vmx_cpu_dead(unsigned int cpu)
>  {
>      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
> -    per_cpu(vmxon_region, cpu) = NULL;
> +    per_cpu(vmxon_region, cpu) = 0;

While this is currently safe (as pa 0 is not part of the available heap
allocation range), might it be worth introducing a named sentential?  I
can forsee a DMLite nested Xen scenario where we definitely don't need
to treat the low 1MB magically.

> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -56,13 +56,14 @@ int nvmx_vcpu_initialise(struct vcpu *v)
>  {
>      struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
>      struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
> +    struct page_info *pg = alloc_domheap_page(NULL, 0);

Again - this can be NUMA allocated with v->domain.

~Andrew

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] VMX: allocate VMCS pages from domain heap
  2015-10-20 10:12   ` Andrew Cooper
@ 2015-10-20 10:35     ` Jan Beulich
  2015-10-21  3:16       ` Tian, Kevin
  0 siblings, 1 reply; 26+ messages in thread
From: Jan Beulich @ 2015-10-20 10:35 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Kevin Tian, Jun Nakajima

>>> On 20.10.15 at 12:12, <andrew.cooper3@citrix.com> wrote:
> On 19/10/15 16:22, Jan Beulich wrote:
>> -static struct vmcs_struct *vmx_alloc_vmcs(void)
>> +static paddr_t vmx_alloc_vmcs(void)
>>  {
>> +    struct page_info *pg;
>>      struct vmcs_struct *vmcs;
>>  
>> -    if ( (vmcs = alloc_xenheap_page()) == NULL )
>> +    if ( (pg = alloc_domheap_page(NULL, 0)) == NULL )
> 
> As an observation, it would be good to pass v from the caller, and NUMA
> allocate against v->domain here.

Yes, in another patch.

>> @@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
>>  void vmx_cpu_dead(unsigned int cpu)
>>  {
>>      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
>> -    per_cpu(vmxon_region, cpu) = NULL;
>> +    per_cpu(vmxon_region, cpu) = 0;
> 
> While this is currently safe (as pa 0 is not part of the available heap
> allocation range), might it be worth introducing a named sentential?  I
> can forsee a DMLite nested Xen scenario where we definitely don't need
> to treat the low 1MB magically.

I guess there are more things to adjust if we ever cared to recover
the few hundred kb below 1Mb. And then I don't see why nested
Xen would matter here: One major main reason for reserving that
space is that we want to put the trampoline there. Do you think
DMlite would allow us to get away without? But even if so, this
would again fall under what I've said in the first sentence.

>> --- a/xen/arch/x86/hvm/vmx/vvmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
>> @@ -56,13 +56,14 @@ int nvmx_vcpu_initialise(struct vcpu *v)
>>  {
>>      struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
>>      struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
>> +    struct page_info *pg = alloc_domheap_page(NULL, 0);
> 
> Again - this can be NUMA allocated with v->domain.

In that same other patch I would say.

Jan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/3] VMX: re-order definitions
  2015-10-19 15:21 ` [PATCH 1/3] VMX: re-order definitions Jan Beulich
@ 2015-10-21  3:04   ` Tian, Kevin
  0 siblings, 0 replies; 26+ messages in thread
From: Tian, Kevin @ 2015-10-21  3:04 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Nakajima, Jun

> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Monday, October 19, 2015 11:22 PM
> To: xen-devel
> Cc: Nakajima, Jun; Tian, Kevin
> Subject: [PATCH 1/3] VMX: re-order definitions
> 
> ... so they end up reasonably sorted, easing lookup.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Kevin Tian <kevin.tian@intel.com>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] VMX: allocate VMCS pages from domain heap
  2015-10-20 10:35     ` Jan Beulich
@ 2015-10-21  3:16       ` Tian, Kevin
  2015-10-21  5:54         ` Jan Beulich
  2015-11-23 14:28         ` Jan Beulich
  0 siblings, 2 replies; 26+ messages in thread
From: Tian, Kevin @ 2015-10-21  3:16 UTC (permalink / raw)
  To: Jan Beulich, Andrew Cooper; +Cc: xen-devel, Nakajima, Jun

> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Tuesday, October 20, 2015 6:36 PM
> 
> >>> On 20.10.15 at 12:12, <andrew.cooper3@citrix.com> wrote:
> > On 19/10/15 16:22, Jan Beulich wrote:
> >> -static struct vmcs_struct *vmx_alloc_vmcs(void)
> >> +static paddr_t vmx_alloc_vmcs(void)
> >>  {
> >> +    struct page_info *pg;
> >>      struct vmcs_struct *vmcs;
> >>
> >> -    if ( (vmcs = alloc_xenheap_page()) == NULL )
> >> +    if ( (pg = alloc_domheap_page(NULL, 0)) == NULL )
> >
> > As an observation, it would be good to pass v from the caller, and NUMA
> > allocate against v->domain here.
> 
> Yes, in another patch.

which 'another patch'? suppose not PATCH 3/3 since I didn't' see 
related change there.

> 
> >> @@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
> >>  void vmx_cpu_dead(unsigned int cpu)
> >>  {
> >>      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
> >> -    per_cpu(vmxon_region, cpu) = NULL;
> >> +    per_cpu(vmxon_region, cpu) = 0;
> >
> > While this is currently safe (as pa 0 is not part of the available heap
> > allocation range), might it be worth introducing a named sentential?  I
> > can forsee a DMLite nested Xen scenario where we definitely don't need
> > to treat the low 1MB magically.
> 
> I guess there are more things to adjust if we ever cared to recover
> the few hundred kb below 1Mb. And then I don't see why nested
> Xen would matter here: One major main reason for reserving that
> space is that we want to put the trampoline there. Do you think
> DMlite would allow us to get away without? But even if so, this
> would again fall under what I've said in the first sentence.
> 

Could you at least introduce a macro first? Regardless of how much
things to adjust, this way makes future change simple.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] vVMX: use latched VMCS machine address
  2015-10-19 15:23 ` [PATCH 3/3] vVMX: use latched VMCS machine address Jan Beulich
@ 2015-10-21  5:44   ` Tian, Kevin
  2016-02-23  8:34   ` Li, Liang Z
  1 sibling, 0 replies; 26+ messages in thread
From: Tian, Kevin @ 2015-10-21  5:44 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Nakajima, Jun

> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Monday, October 19, 2015 11:23 PM
> 
> Instead of calling domain_page_map_to_mfn() over and over, latch the
> guest VMCS machine address unconditionally (i.e. independent of whether
> VMCS shadowing is supported by the hardware).
> 
> Since this requires altering the parameters of __[gs]et_vmcs{,_real}()
> (and hence all their callers) anyway, take the opportunity to also drop
> the bogus double underscores from their names (and from
> __[gs]et_vmcs_virtual() as well).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Kevin Tian <kevin.tian@intel.com>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] VMX: allocate VMCS pages from domain heap
  2015-10-21  3:16       ` Tian, Kevin
@ 2015-10-21  5:54         ` Jan Beulich
  2015-11-23 14:28         ` Jan Beulich
  1 sibling, 0 replies; 26+ messages in thread
From: Jan Beulich @ 2015-10-21  5:54 UTC (permalink / raw)
  To: andrew.cooper3, kevin.tian; +Cc: xen-devel, jun.nakajima

>>> "Tian, Kevin" <kevin.tian@intel.com> 10/21/15 5:16 AM >>>
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: Tuesday, October 20, 2015 6:36 PM
>> >>> On 20.10.15 at 12:12, <andrew.cooper3@citrix.com> wrote:
>> > On 19/10/15 16:22, Jan Beulich wrote:
>> >> -static struct vmcs_struct *vmx_alloc_vmcs(void)
>> >> +static paddr_t vmx_alloc_vmcs(void)
>> >>  {
>> >> +    struct page_info *pg;
>> >>      struct vmcs_struct *vmcs;
>> >>
>> >> -    if ( (vmcs = alloc_xenheap_page()) == NULL )
>> >> +    if ( (pg = alloc_domheap_page(NULL, 0)) == NULL )
>> >
>> > As an observation, it would be good to pass v from the caller, and NUMA
>> > allocate against v->domain here.
>> 
>> Yes, in another patch.
>
>which 'another patch'? suppose not PATCH 3/3 since I didn't' see 
>related change there.

A yet to be written one.

>> >> @@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
>> >>  void vmx_cpu_dead(unsigned int cpu)
>> >>  {
>> >>      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
>> >> -    per_cpu(vmxon_region, cpu) = NULL;
>> >> +    per_cpu(vmxon_region, cpu) = 0;
>> >
>> > While this is currently safe (as pa 0 is not part of the available heap
>> > allocation range), might it be worth introducing a named sentential?  I
>> > can forsee a DMLite nested Xen scenario where we definitely don't need
>> > to treat the low 1MB magically.
>> 
>> I guess there are more things to adjust if we ever cared to recover
>> the few hundred kb below 1Mb. And then I don't see why nested
>> Xen would matter here: One major main reason for reserving that
>> space is that we want to put the trampoline there. Do you think
>> DMlite would allow us to get away without? But even if so, this
>> would again fall under what I've said in the first sentence.
>
>Could you at least introduce a macro first? Regardless of how much
>things to adjust, this way makes future change simple.

I can, just that this won't catch any other of the MFN-0-is-not-used cases
in the code base.

Jan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] VMX: allocate VMCS pages from domain heap
  2015-10-21  3:16       ` Tian, Kevin
  2015-10-21  5:54         ` Jan Beulich
@ 2015-11-23 14:28         ` Jan Beulich
  2015-11-24  5:04           ` Tian, Kevin
  1 sibling, 1 reply; 26+ messages in thread
From: Jan Beulich @ 2015-11-23 14:28 UTC (permalink / raw)
  To: Andrew Cooper, Kevin Tian; +Cc: xen-devel, Jun Nakajima

>>> On 21.10.15 at 05:16, <kevin.tian@intel.com> wrote:
>>  From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: Tuesday, October 20, 2015 6:36 PM
>> >>> On 20.10.15 at 12:12, <andrew.cooper3@citrix.com> wrote:
>> > On 19/10/15 16:22, Jan Beulich wrote:
>> >> @@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
>> >>  void vmx_cpu_dead(unsigned int cpu)
>> >>  {
>> >>      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
>> >> -    per_cpu(vmxon_region, cpu) = NULL;
>> >> +    per_cpu(vmxon_region, cpu) = 0;
>> >
>> > While this is currently safe (as pa 0 is not part of the available heap
>> > allocation range), might it be worth introducing a named sentential?  I
>> > can forsee a DMLite nested Xen scenario where we definitely don't need
>> > to treat the low 1MB magically.
>> 
>> I guess there are more things to adjust if we ever cared to recover
>> the few hundred kb below 1Mb. And then I don't see why nested
>> Xen would matter here: One major main reason for reserving that
>> space is that we want to put the trampoline there. Do you think
>> DMlite would allow us to get away without? But even if so, this
>> would again fall under what I've said in the first sentence.
> 
> Could you at least introduce a macro first? Regardless of how much
> things to adjust, this way makes future change simple.

So I've made an attempt, but this is really getting unwieldy: Setting
per-CPU data to non-zero initial values is not possible; making sure
cleanup code avoids assuming such variables got initialized is quite
error prone. Same goes at least to a certain extent for struct vcpu
members (see e.g. nvmx_vcpu_destroy(), which currently is
correct no matter whether nvmx_vcpu_initialise() ran at all, or to
completion).

I also don't see what a macro would help here, or how/where it
should be used. paddr_valid()? Yes, I could do this, but it wouldn't
simplify much when later wanting to convert to a non-zero value
for above reasons (it would instead give the wrong impression that
changing the value is all it takes).

Jan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] VMX: allocate VMCS pages from domain heap
  2015-11-23 14:28         ` Jan Beulich
@ 2015-11-24  5:04           ` Tian, Kevin
  2015-11-24  7:50             ` Jan Beulich
  0 siblings, 1 reply; 26+ messages in thread
From: Tian, Kevin @ 2015-11-24  5:04 UTC (permalink / raw)
  To: Jan Beulich, Andrew Cooper; +Cc: xen-devel, Nakajima, Jun

> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Monday, November 23, 2015 10:28 PM
> 
> >>> On 21.10.15 at 05:16, <kevin.tian@intel.com> wrote:
> >>  From: Jan Beulich [mailto:JBeulich@suse.com]
> >> Sent: Tuesday, October 20, 2015 6:36 PM
> >> >>> On 20.10.15 at 12:12, <andrew.cooper3@citrix.com> wrote:
> >> > On 19/10/15 16:22, Jan Beulich wrote:
> >> >> @@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
> >> >>  void vmx_cpu_dead(unsigned int cpu)
> >> >>  {
> >> >>      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
> >> >> -    per_cpu(vmxon_region, cpu) = NULL;
> >> >> +    per_cpu(vmxon_region, cpu) = 0;
> >> >
> >> > While this is currently safe (as pa 0 is not part of the available heap
> >> > allocation range), might it be worth introducing a named sentential?  I
> >> > can forsee a DMLite nested Xen scenario where we definitely don't need
> >> > to treat the low 1MB magically.
> >>
> >> I guess there are more things to adjust if we ever cared to recover
> >> the few hundred kb below 1Mb. And then I don't see why nested
> >> Xen would matter here: One major main reason for reserving that
> >> space is that we want to put the trampoline there. Do you think
> >> DMlite would allow us to get away without? But even if so, this
> >> would again fall under what I've said in the first sentence.
> >
> > Could you at least introduce a macro first? Regardless of how much
> > things to adjust, this way makes future change simple.
> 
> So I've made an attempt, but this is really getting unwieldy: Setting
> per-CPU data to non-zero initial values is not possible; making sure
> cleanup code avoids assuming such variables got initialized is quite
> error prone. Same goes at least to a certain extent for struct vcpu
> members (see e.g. nvmx_vcpu_destroy(), which currently is
> correct no matter whether nvmx_vcpu_initialise() ran at all, or to
> completion).
> 
> I also don't see what a macro would help here, or how/where it
> should be used. paddr_valid()? Yes, I could do this, but it wouldn't
> simplify much when later wanting to convert to a non-zero value
> for above reasons (it would instead give the wrong impression that
> changing the value is all it takes).
> 

Thanks for looking into this attempt. Based on your explanation
I think your original code is reasonable to go. Here is my ack:

Acked-by: Kevin Tian <kevin.tian@intel.com>

Thanks
Kevin

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] VMX: allocate VMCS pages from domain heap
  2015-11-24  5:04           ` Tian, Kevin
@ 2015-11-24  7:50             ` Jan Beulich
  2015-11-24 10:55               ` Andrew Cooper
  0 siblings, 1 reply; 26+ messages in thread
From: Jan Beulich @ 2015-11-24  7:50 UTC (permalink / raw)
  To: Andrew Cooper, Kevin Tian; +Cc: xen-devel, Jun Nakajima

>>> On 24.11.15 at 06:04, <kevin.tian@intel.com> wrote:
>>  From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: Monday, November 23, 2015 10:28 PM
>> 
>> >>> On 21.10.15 at 05:16, <kevin.tian@intel.com> wrote:
>> >>  From: Jan Beulich [mailto:JBeulich@suse.com]
>> >> Sent: Tuesday, October 20, 2015 6:36 PM
>> >> >>> On 20.10.15 at 12:12, <andrew.cooper3@citrix.com> wrote:
>> >> > On 19/10/15 16:22, Jan Beulich wrote:
>> >> >> @@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
>> >> >>  void vmx_cpu_dead(unsigned int cpu)
>> >> >>  {
>> >> >>      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
>> >> >> -    per_cpu(vmxon_region, cpu) = NULL;
>> >> >> +    per_cpu(vmxon_region, cpu) = 0;
>> >> >
>> >> > While this is currently safe (as pa 0 is not part of the available heap
>> >> > allocation range), might it be worth introducing a named sentential?  I
>> >> > can forsee a DMLite nested Xen scenario where we definitely don't need
>> >> > to treat the low 1MB magically.
>> >>
>> >> I guess there are more things to adjust if we ever cared to recover
>> >> the few hundred kb below 1Mb. And then I don't see why nested
>> >> Xen would matter here: One major main reason for reserving that
>> >> space is that we want to put the trampoline there. Do you think
>> >> DMlite would allow us to get away without? But even if so, this
>> >> would again fall under what I've said in the first sentence.
>> >
>> > Could you at least introduce a macro first? Regardless of how much
>> > things to adjust, this way makes future change simple.
>> 
>> So I've made an attempt, but this is really getting unwieldy: Setting
>> per-CPU data to non-zero initial values is not possible; making sure
>> cleanup code avoids assuming such variables got initialized is quite
>> error prone. Same goes at least to a certain extent for struct vcpu
>> members (see e.g. nvmx_vcpu_destroy(), which currently is
>> correct no matter whether nvmx_vcpu_initialise() ran at all, or to
>> completion).
>> 
>> I also don't see what a macro would help here, or how/where it
>> should be used. paddr_valid()? Yes, I could do this, but it wouldn't
>> simplify much when later wanting to convert to a non-zero value
>> for above reasons (it would instead give the wrong impression that
>> changing the value is all it takes).
>> 
> 
> Thanks for looking into this attempt. Based on your explanation
> I think your original code is reasonable to go. Here is my ack:
> 
> Acked-by: Kevin Tian <kevin.tian@intel.com>

Thanks Kevin. Andrew - please indicate whether your previous
comment is to be considered a NAK, or "just a comment".

Jan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] VMX: allocate VMCS pages from domain heap
  2015-11-24  7:50             ` Jan Beulich
@ 2015-11-24 10:55               ` Andrew Cooper
  2015-11-24 10:59                 ` Andrew Cooper
  0 siblings, 1 reply; 26+ messages in thread
From: Andrew Cooper @ 2015-11-24 10:55 UTC (permalink / raw)
  To: Jan Beulich, Kevin Tian; +Cc: xen-devel, Jun Nakajima

On 24/11/15 07:50, Jan Beulich wrote:
>>>> On 24.11.15 at 06:04, <kevin.tian@intel.com> wrote:
>>>  From: Jan Beulich [mailto:JBeulich@suse.com]
>>> Sent: Monday, November 23, 2015 10:28 PM
>>>
>>>>>> On 21.10.15 at 05:16, <kevin.tian@intel.com> wrote:
>>>>>  From: Jan Beulich [mailto:JBeulich@suse.com]
>>>>> Sent: Tuesday, October 20, 2015 6:36 PM
>>>>>>>> On 20.10.15 at 12:12, <andrew.cooper3@citrix.com> wrote:
>>>>>> On 19/10/15 16:22, Jan Beulich wrote:
>>>>>>> @@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
>>>>>>>  void vmx_cpu_dead(unsigned int cpu)
>>>>>>>  {
>>>>>>>      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
>>>>>>> -    per_cpu(vmxon_region, cpu) = NULL;
>>>>>>> +    per_cpu(vmxon_region, cpu) = 0;
>>>>>> While this is currently safe (as pa 0 is not part of the available heap
>>>>>> allocation range), might it be worth introducing a named sentential?  I
>>>>>> can forsee a DMLite nested Xen scenario where we definitely don't need
>>>>>> to treat the low 1MB magically.
>>>>> I guess there are more things to adjust if we ever cared to recover
>>>>> the few hundred kb below 1Mb. And then I don't see why nested
>>>>> Xen would matter here: One major main reason for reserving that
>>>>> space is that we want to put the trampoline there. Do you think
>>>>> DMlite would allow us to get away without? But even if so, this
>>>>> would again fall under what I've said in the first sentence.
>>>> Could you at least introduce a macro first? Regardless of how much
>>>> things to adjust, this way makes future change simple.
>>> So I've made an attempt, but this is really getting unwieldy: Setting
>>> per-CPU data to non-zero initial values is not possible; making sure
>>> cleanup code avoids assuming such variables got initialized is quite
>>> error prone. Same goes at least to a certain extent for struct vcpu
>>> members (see e.g. nvmx_vcpu_destroy(), which currently is
>>> correct no matter whether nvmx_vcpu_initialise() ran at all, or to
>>> completion).
>>>
>>> I also don't see what a macro would help here, or how/where it
>>> should be used. paddr_valid()? Yes, I could do this, but it wouldn't
>>> simplify much when later wanting to convert to a non-zero value
>>> for above reasons (it would instead give the wrong impression that
>>> changing the value is all it takes).
>>>
>> Thanks for looking into this attempt. Based on your explanation
>> I think your original code is reasonable to go. Here is my ack:
>>
>> Acked-by: Kevin Tian <kevin.tian@intel.com>
> Thanks Kevin. Andrew - please indicate whether your previous
> comment is to be considered a NAK, or "just a comment".

I would prefer a sentinel value being introduced, but can live without
it being changed.  It is definitely not the only area which uses 0 as a
sentinel and cleanup will have to happen, one way or another.

~Andrew

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] VMX: allocate VMCS pages from domain heap
  2015-11-24 10:55               ` Andrew Cooper
@ 2015-11-24 10:59                 ` Andrew Cooper
  2015-11-24 11:04                   ` Jan Beulich
  0 siblings, 1 reply; 26+ messages in thread
From: Andrew Cooper @ 2015-11-24 10:59 UTC (permalink / raw)
  To: Jan Beulich, Kevin Tian; +Cc: xen-devel, Jun Nakajima

On 24/11/15 10:55, Andrew Cooper wrote:
> On 24/11/15 07:50, Jan Beulich wrote:
>>>>> On 24.11.15 at 06:04, <kevin.tian@intel.com> wrote:
>>>>  From: Jan Beulich [mailto:JBeulich@suse.com]
>>>> Sent: Monday, November 23, 2015 10:28 PM
>>>>
>>>>>>> On 21.10.15 at 05:16, <kevin.tian@intel.com> wrote:
>>>>>>  From: Jan Beulich [mailto:JBeulich@suse.com]
>>>>>> Sent: Tuesday, October 20, 2015 6:36 PM
>>>>>>>>> On 20.10.15 at 12:12, <andrew.cooper3@citrix.com> wrote:
>>>>>>> On 19/10/15 16:22, Jan Beulich wrote:
>>>>>>>> @@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
>>>>>>>>  void vmx_cpu_dead(unsigned int cpu)
>>>>>>>>  {
>>>>>>>>      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
>>>>>>>> -    per_cpu(vmxon_region, cpu) = NULL;
>>>>>>>> +    per_cpu(vmxon_region, cpu) = 0;
>>>>>>> While this is currently safe (as pa 0 is not part of the available heap
>>>>>>> allocation range), might it be worth introducing a named sentential?  I
>>>>>>> can forsee a DMLite nested Xen scenario where we definitely don't need
>>>>>>> to treat the low 1MB magically.
>>>>>> I guess there are more things to adjust if we ever cared to recover
>>>>>> the few hundred kb below 1Mb. And then I don't see why nested
>>>>>> Xen would matter here: One major main reason for reserving that
>>>>>> space is that we want to put the trampoline there. Do you think
>>>>>> DMlite would allow us to get away without? But even if so, this
>>>>>> would again fall under what I've said in the first sentence.
>>>>> Could you at least introduce a macro first? Regardless of how much
>>>>> things to adjust, this way makes future change simple.
>>>> So I've made an attempt, but this is really getting unwieldy: Setting
>>>> per-CPU data to non-zero initial values is not possible; making sure
>>>> cleanup code avoids assuming such variables got initialized is quite
>>>> error prone. Same goes at least to a certain extent for struct vcpu
>>>> members (see e.g. nvmx_vcpu_destroy(), which currently is
>>>> correct no matter whether nvmx_vcpu_initialise() ran at all, or to
>>>> completion).
>>>>
>>>> I also don't see what a macro would help here, or how/where it
>>>> should be used. paddr_valid()? Yes, I could do this, but it wouldn't
>>>> simplify much when later wanting to convert to a non-zero value
>>>> for above reasons (it would instead give the wrong impression that
>>>> changing the value is all it takes).
>>>>
>>> Thanks for looking into this attempt. Based on your explanation
>>> I think your original code is reasonable to go. Here is my ack:
>>>
>>> Acked-by: Kevin Tian <kevin.tian@intel.com>
>> Thanks Kevin. Andrew - please indicate whether your previous
>> comment is to be considered a NAK, or "just a comment".
> I would prefer a sentinel value being introduced, but can live without
> it being changed.  It is definitely not the only area which uses 0 as a
> sentinel and cleanup will have to happen, one way or another.

Actually it turns out that we already have an appropriate sentinel.

include/asm-x86/types.h:34:#define INVALID_PADDR (~0UL)

~Andrew

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] VMX: allocate VMCS pages from domain heap
  2015-11-24 10:59                 ` Andrew Cooper
@ 2015-11-24 11:04                   ` Jan Beulich
  2015-11-24 11:09                     ` Andrew Cooper
  0 siblings, 1 reply; 26+ messages in thread
From: Jan Beulich @ 2015-11-24 11:04 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Kevin Tian, Jun Nakajima

>>> On 24.11.15 at 11:59, <andrew.cooper3@citrix.com> wrote:
> On 24/11/15 10:55, Andrew Cooper wrote:
>> On 24/11/15 07:50, Jan Beulich wrote:
>>>>>> On 24.11.15 at 06:04, <kevin.tian@intel.com> wrote:
>>>>>  From: Jan Beulich [mailto:JBeulich@suse.com]
>>>>> Sent: Monday, November 23, 2015 10:28 PM
>>>>>
>>>>>>>> On 21.10.15 at 05:16, <kevin.tian@intel.com> wrote:
>>>>>>>  From: Jan Beulich [mailto:JBeulich@suse.com]
>>>>>>> Sent: Tuesday, October 20, 2015 6:36 PM
>>>>>>>>>> On 20.10.15 at 12:12, <andrew.cooper3@citrix.com> wrote:
>>>>>>>> On 19/10/15 16:22, Jan Beulich wrote:
>>>>>>>>> @@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
>>>>>>>>>  void vmx_cpu_dead(unsigned int cpu)
>>>>>>>>>  {
>>>>>>>>>      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
>>>>>>>>> -    per_cpu(vmxon_region, cpu) = NULL;
>>>>>>>>> +    per_cpu(vmxon_region, cpu) = 0;
>>>>>>>> While this is currently safe (as pa 0 is not part of the available heap
>>>>>>>> allocation range), might it be worth introducing a named sentential?  I
>>>>>>>> can forsee a DMLite nested Xen scenario where we definitely don't need
>>>>>>>> to treat the low 1MB magically.
>>>>>>> I guess there are more things to adjust if we ever cared to recover
>>>>>>> the few hundred kb below 1Mb. And then I don't see why nested
>>>>>>> Xen would matter here: One major main reason for reserving that
>>>>>>> space is that we want to put the trampoline there. Do you think
>>>>>>> DMlite would allow us to get away without? But even if so, this
>>>>>>> would again fall under what I've said in the first sentence.
>>>>>> Could you at least introduce a macro first? Regardless of how much
>>>>>> things to adjust, this way makes future change simple.
>>>>> So I've made an attempt, but this is really getting unwieldy: Setting
>>>>> per-CPU data to non-zero initial values is not possible; making sure
>>>>> cleanup code avoids assuming such variables got initialized is quite
>>>>> error prone. Same goes at least to a certain extent for struct vcpu
>>>>> members (see e.g. nvmx_vcpu_destroy(), which currently is
>>>>> correct no matter whether nvmx_vcpu_initialise() ran at all, or to
>>>>> completion).
>>>>>
>>>>> I also don't see what a macro would help here, or how/where it
>>>>> should be used. paddr_valid()? Yes, I could do this, but it wouldn't
>>>>> simplify much when later wanting to convert to a non-zero value
>>>>> for above reasons (it would instead give the wrong impression that
>>>>> changing the value is all it takes).
>>>>>
>>>> Thanks for looking into this attempt. Based on your explanation
>>>> I think your original code is reasonable to go. Here is my ack:
>>>>
>>>> Acked-by: Kevin Tian <kevin.tian@intel.com>
>>> Thanks Kevin. Andrew - please indicate whether your previous
>>> comment is to be considered a NAK, or "just a comment".
>> I would prefer a sentinel value being introduced, but can live without
>> it being changed.  It is definitely not the only area which uses 0 as a
>> sentinel and cleanup will have to happen, one way or another.
> 
> Actually it turns out that we already have an appropriate sentinel.
> 
> include/asm-x86/types.h:34:#define INVALID_PADDR (~0UL)

Yes, that's what I had tried to use in above mentioned attempt.

Jan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] VMX: allocate VMCS pages from domain heap
  2015-11-24 11:04                   ` Jan Beulich
@ 2015-11-24 11:09                     ` Andrew Cooper
  0 siblings, 0 replies; 26+ messages in thread
From: Andrew Cooper @ 2015-11-24 11:09 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Kevin Tian, Jun Nakajima

On 24/11/15 11:04, Jan Beulich wrote:
>>>> On 24.11.15 at 11:59, <andrew.cooper3@citrix.com> wrote:
>> On 24/11/15 10:55, Andrew Cooper wrote:
>>> On 24/11/15 07:50, Jan Beulich wrote:
>>>>>>> On 24.11.15 at 06:04, <kevin.tian@intel.com> wrote:
>>>>>>  From: Jan Beulich [mailto:JBeulich@suse.com]
>>>>>> Sent: Monday, November 23, 2015 10:28 PM
>>>>>>
>>>>>>>>> On 21.10.15 at 05:16, <kevin.tian@intel.com> wrote:
>>>>>>>>  From: Jan Beulich [mailto:JBeulich@suse.com]
>>>>>>>> Sent: Tuesday, October 20, 2015 6:36 PM
>>>>>>>>>>> On 20.10.15 at 12:12, <andrew.cooper3@citrix.com> wrote:
>>>>>>>>> On 19/10/15 16:22, Jan Beulich wrote:
>>>>>>>>>> @@ -580,7 +583,7 @@ int vmx_cpu_up_prepare(unsigned int cpu)
>>>>>>>>>>  void vmx_cpu_dead(unsigned int cpu)
>>>>>>>>>>  {
>>>>>>>>>>      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
>>>>>>>>>> -    per_cpu(vmxon_region, cpu) = NULL;
>>>>>>>>>> +    per_cpu(vmxon_region, cpu) = 0;
>>>>>>>>> While this is currently safe (as pa 0 is not part of the available heap
>>>>>>>>> allocation range), might it be worth introducing a named sentential?  I
>>>>>>>>> can forsee a DMLite nested Xen scenario where we definitely don't need
>>>>>>>>> to treat the low 1MB magically.
>>>>>>>> I guess there are more things to adjust if we ever cared to recover
>>>>>>>> the few hundred kb below 1Mb. And then I don't see why nested
>>>>>>>> Xen would matter here: One major main reason for reserving that
>>>>>>>> space is that we want to put the trampoline there. Do you think
>>>>>>>> DMlite would allow us to get away without? But even if so, this
>>>>>>>> would again fall under what I've said in the first sentence.
>>>>>>> Could you at least introduce a macro first? Regardless of how much
>>>>>>> things to adjust, this way makes future change simple.
>>>>>> So I've made an attempt, but this is really getting unwieldy: Setting
>>>>>> per-CPU data to non-zero initial values is not possible; making sure
>>>>>> cleanup code avoids assuming such variables got initialized is quite
>>>>>> error prone. Same goes at least to a certain extent for struct vcpu
>>>>>> members (see e.g. nvmx_vcpu_destroy(), which currently is
>>>>>> correct no matter whether nvmx_vcpu_initialise() ran at all, or to
>>>>>> completion).
>>>>>>
>>>>>> I also don't see what a macro would help here, or how/where it
>>>>>> should be used. paddr_valid()? Yes, I could do this, but it wouldn't
>>>>>> simplify much when later wanting to convert to a non-zero value
>>>>>> for above reasons (it would instead give the wrong impression that
>>>>>> changing the value is all it takes).
>>>>>>
>>>>> Thanks for looking into this attempt. Based on your explanation
>>>>> I think your original code is reasonable to go. Here is my ack:
>>>>>
>>>>> Acked-by: Kevin Tian <kevin.tian@intel.com>
>>>> Thanks Kevin. Andrew - please indicate whether your previous
>>>> comment is to be considered a NAK, or "just a comment".
>>> I would prefer a sentinel value being introduced, but can live without
>>> it being changed.  It is definitely not the only area which uses 0 as a
>>> sentinel and cleanup will have to happen, one way or another.
>> Actually it turns out that we already have an appropriate sentinel.
>>
>> include/asm-x86/types.h:34:#define INVALID_PADDR (~0UL)
> Yes, that's what I had tried to use in above mentioned attempt.

Right.  Lets not block the change on another infrastructure improvement.

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] vVMX: use latched VMCS machine address
  2015-10-19 15:23 ` [PATCH 3/3] vVMX: use latched VMCS machine address Jan Beulich
  2015-10-21  5:44   ` Tian, Kevin
@ 2016-02-23  8:34   ` Li, Liang Z
  2016-02-23 10:31     ` Jan Beulich
  1 sibling, 1 reply; 26+ messages in thread
From: Li, Liang Z @ 2016-02-23  8:34 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Tian, Kevin, xen-devel, Ian Campbell, Andrew Cooper, Ian Jackson,
	osstest service owner, Wang, Yong Y, Nakajima, Jun, Jin, Gordon,
	Hu, Robert

Hi Jan,

I found some issues in your patch, see the comments below.



> -----Original Message-----
> From: xen-devel-bounces@lists.xen.org [mailto:xen-devel-
> bounces@lists.xen.org] On Behalf Of Jan Beulich
> Sent: Monday, October 19, 2015 11:23 PM
> To: xen-devel
> Cc: Tian, Kevin; Nakajima, Jun
> Subject: [Xen-devel] [PATCH 3/3] vVMX: use latched VMCS machine address
> 
> Instead of calling domain_page_map_to_mfn() over and over, latch the
> guest VMCS machine address unconditionally (i.e. independent of whether
> VMCS shadowing is supported by the hardware).
> 
> Since this requires altering the parameters of __[gs]et_vmcs{,_real}() (and
> hence all their callers) anyway, take the opportunity to also drop the bogus
> double underscores from their names (and from
> __[gs]et_vmcs_virtual() as well).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen/arch/x86/hvm/vmx/intr.c
> +++ b/xen/arch/x86/hvm/vmx/intr.c
> @@ -191,13 +191,13 @@ static int nvmx_intr_intercept(struct vc
>          if ( intack.source == hvm_intsrc_pic ||
>                   intack.source == hvm_intsrc_lapic )
>          {
> -            ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
> PIN_BASED_VM_EXEC_CONTROL);
> +            ctrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
>              if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
>                  return 0;
> 
>              vmx_inject_extint(intack.vector, intack.source);
> 
> -            ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
> VM_EXIT_CONTROLS);
> +            ctrl = get_vvmcs(v, VM_EXIT_CONTROLS);
>              if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
>              {
>                  /* for now, duplicate the ack path in vmx_intr_assist */
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -932,37 +932,36 @@ void vmx_vmcs_switch(paddr_t from, paddr
>      spin_unlock(&vmx->vmcs_lock);
>  }
> 
> -void virtual_vmcs_enter(void *vvmcs)
> +void virtual_vmcs_enter(const struct vcpu *v)
>  {
> -    __vmptrld(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
> +    __vmptrld(v->arch.hvm_vmx.vmcs_shadow_maddr);

Debug shows  v->arch.hvm_vmx.vmcs_shadow_maddr  will equal to 0 at this point, this will crash the system.

>  }
> 
> -void virtual_vmcs_exit(void *vvmcs)
> +void virtual_vmcs_exit(const struct vcpu *v)
>  {
>      paddr_t cur = this_cpu(current_vmcs);
> 
> -    __vmpclear(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
> +    __vmpclear(v->arch.hvm_vmx.vmcs_shadow_maddr);

Debug shows  v->arch.hvm_vmx.vmcs_shadow_maddr  will equal to 0 at this point, this will crash the system.
Maybe we should use pfn_to_paddr(domain_page_map_to_mfn(vvmcs))) here.

>      if ( cur )
>          __vmptrld(cur);
> -
>  }

> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c

>  static void vvmcs_to_shadow_bulk(struct vcpu *v, unsigned int n, @@ -
> 950,15 +926,15 @@ static void vvmcs_to_shadow_bulk(struct
> 
>  fallback:
>      for ( i = 0; i < n; i++ )
> -        vvmcs_to_shadow(vvmcs, field[i]);
> +        vvmcs_to_shadow(v, field[i]);
>  }

You omitted something in function vvmcs_to_shadow_bulk()
=====================================
static void vvmcs_to_shadow_bulk(struct vcpu *v, unsigned int n,
                                 const u16 *field)
{
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
    void *vvmcs = nvcpu->nv_vvmcx;
    u64 *value = this_cpu(vvmcs_buf);
    unsigned int i;
 
    if ( !cpu_has_vmx_vmcs_shadowing )
        goto fallback;
 
    if ( !value || n > VMCS_BUF_SIZE )
    {
        gdprintk(XENLOG_DEBUG, "vmcs sync fall back to non-bulk mode, \
                 buffer: %p, buffer size: %d, fields number: %d.\n",
                 value, VMCS_BUF_SIZE, n);
        goto fallback;
    }
 
    virtual_vmcs_enter(vvmcs);
    for ( i = 0; i < n; i++ )
        __vmread(field[i], &value[i]);
    virtual_vmcs_exit(vvmcs);
 
    for ( i = 0; i < n; i++ )
        __vmwrite(field[i], value[i]);
 
    return;
 
fallback:
    for ( i = 0; i < n; i++ )
        vvmcs_to_shadow(v, field[i]);
}
================================
You should use virtual_vmcs_enter(v) in this function, not virtual_vmcs_enter(vvmcs).
the same thing happened in the function shadow_to_vvmcs_bulk().
==================================
static void shadow_to_vvmcs_bulk(struct vcpu *v, unsigned int n,
                                 const u16 *field)
{
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
    void *vvmcs = nvcpu->nv_vvmcx;
    u64 *value = this_cpu(vvmcs_buf);
    unsigned int i;
 
    if ( !cpu_has_vmx_vmcs_shadowing )
        goto fallback;
 
    if ( !value || n > VMCS_BUF_SIZE )
    {
        gdprintk(XENLOG_DEBUG, "vmcs sync fall back to non-bulk mode, \
                 buffer: %p, buffer size: %d, fields number: %d.\n",
                 value, VMCS_BUF_SIZE, n);
        goto fallback;
    }
 
    for ( i = 0; i < n; i++ )
        __vmread(field[i], &value[i]);
 
    virtual_vmcs_enter(vvmcs);
    for ( i = 0; i < n; i++ )
        __vmwrite(field[i], value[i]);
    virtual_vmcs_exit(vvmcs);
 
    return;
 
fallback:
    for ( i = 0; i < n; i++ )
        shadow_to_vvmcs(v, field[i]);
}
=====================================

> @@ -1694,10 +1657,10 @@ int nvmx_handle_vmclear(struct cpu_user_
>          rc = VMFAIL_INVALID;
>      else if ( gpa == nvcpu->nv_vvmcxaddr )
>      {
> -        if ( cpu_has_vmx_vmcs_shadowing )
> -            nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
> -        clear_vvmcs_launched(&nvmx->launched_list,
> -            domain_page_map_to_mfn(nvcpu->nv_vvmcx));
> +        unsigned long mfn =
> + PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr);
> +
> +        nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
> +        clear_vvmcs_launched(&nvmx->launched_list, mfn);

v->arch.hvm_vmx.vmcs_shadow_maddr will be set to 0 in nvmx_clear_vmcs_pointer()
so mfn will be 0 at this point, it's incorrect.


Liang

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] vVMX: use latched VMCS machine address
  2016-02-23  8:34   ` Li, Liang Z
@ 2016-02-23 10:31     ` Jan Beulich
  2016-02-23 10:48       ` Li, Liang Z
  2016-02-24  7:04       ` Li, Liang Z
  0 siblings, 2 replies; 26+ messages in thread
From: Jan Beulich @ 2016-02-23 10:31 UTC (permalink / raw)
  To: Liang Z Li
  Cc: Kevin Tian, xen-devel, Ian Campbell, AndrewCooper, Ian Jackson,
	Yong Y Wang, osstest service owner, xen-devel, Jun Nakajima,
	Gordon Jin, Robert Hu

>>> On 23.02.16 at 09:34, <liang.z.li@intel.com> wrote:
> I found some issues in your patch, see the comments below.

Thanks for getting back on this.

>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>> @@ -932,37 +932,36 @@ void vmx_vmcs_switch(paddr_t from, paddr
>>      spin_unlock(&vmx->vmcs_lock);
>>  }
>> 
>> -void virtual_vmcs_enter(void *vvmcs)
>> +void virtual_vmcs_enter(const struct vcpu *v)
>>  {
>> -    __vmptrld(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
>> +    __vmptrld(v->arch.hvm_vmx.vmcs_shadow_maddr);
> 
> Debug shows  v->arch.hvm_vmx.vmcs_shadow_maddr  will equal to 0 at this point, 
> this will crash the system.
> 
>>  }
>> 
>> -void virtual_vmcs_exit(void *vvmcs)
>> +void virtual_vmcs_exit(const struct vcpu *v)
>>  {
>>      paddr_t cur = this_cpu(current_vmcs);
>> 
>> -    __vmpclear(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
>> +    __vmpclear(v->arch.hvm_vmx.vmcs_shadow_maddr);
> 
> Debug shows  v->arch.hvm_vmx.vmcs_shadow_maddr  will equal to 0 at this point, 
> this will crash the system.

For both of these you provide too little context. In particular ...

> Maybe we should use pfn_to_paddr(domain_page_map_to_mfn(vvmcs))) here.

... this shouldn't be necessary, since the whole purpose of the
patch is to avoid this, making sure
v->arch.hvm_vmx.vmcs_shadow_maddr always represents
domain_page_map_to_mfn(vvmcs). Hence if you find the latched
field to be zero, we'll need to understand _why_ this is so, i.e.
what code path cleared the field (perhaps prematurely).

>> --- a/xen/arch/x86/hvm/vmx/vvmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> 
>>  static void vvmcs_to_shadow_bulk(struct vcpu *v, unsigned int n, @@ -
>> 950,15 +926,15 @@ static void vvmcs_to_shadow_bulk(struct
>> 
>>  fallback:
>>      for ( i = 0; i < n; i++ )
>> -        vvmcs_to_shadow(vvmcs, field[i]);
>> +        vvmcs_to_shadow(v, field[i]);
>>  }
> 
> You omitted something in function vvmcs_to_shadow_bulk()
> =====================================
> static void vvmcs_to_shadow_bulk(struct vcpu *v, unsigned int n,
>                                  const u16 *field)
> {
>     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
>     void *vvmcs = nvcpu->nv_vvmcx;
>     u64 *value = this_cpu(vvmcs_buf);
>     unsigned int i;
>  
>     if ( !cpu_has_vmx_vmcs_shadowing )
>         goto fallback;
>  
>     if ( !value || n > VMCS_BUF_SIZE )
>     {
>         gdprintk(XENLOG_DEBUG, "vmcs sync fall back to non-bulk mode, \
>                  buffer: %p, buffer size: %d, fields number: %d.\n",
>                  value, VMCS_BUF_SIZE, n);
>         goto fallback;
>     }
>  
>     virtual_vmcs_enter(vvmcs);
>     for ( i = 0; i < n; i++ )
>         __vmread(field[i], &value[i]);
>     virtual_vmcs_exit(vvmcs);
>  
>     for ( i = 0; i < n; i++ )
>         __vmwrite(field[i], value[i]);
>  
>     return;
>  
> fallback:
>     for ( i = 0; i < n; i++ )
>         vvmcs_to_shadow(v, field[i]);
> }
> ================================
> You should use virtual_vmcs_enter(v) in this function, not 
> virtual_vmcs_enter(vvmcs).
> the same thing happened in the function shadow_to_vvmcs_bulk().

Oh, indeed. It's not helpful that vvmcs is of type void *.

>> @@ -1694,10 +1657,10 @@ int nvmx_handle_vmclear(struct cpu_user_
>>          rc = VMFAIL_INVALID;
>>      else if ( gpa == nvcpu->nv_vvmcxaddr )
>>      {
>> -        if ( cpu_has_vmx_vmcs_shadowing )
>> -            nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
>> -        clear_vvmcs_launched(&nvmx->launched_list,
>> -            domain_page_map_to_mfn(nvcpu->nv_vvmcx));
>> +        unsigned long mfn =
>> + PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr);
>> +
>> +        nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
>> +        clear_vvmcs_launched(&nvmx->launched_list, mfn);
> 
> v->arch.hvm_vmx.vmcs_shadow_maddr will be set to 0 in 
> nvmx_clear_vmcs_pointer()
> so mfn will be 0 at this point, it's incorrect.

How that? mfn gets latched before calling nvmx_clear_vmcs_pointer(),
precisely because that function would clear
v->arch.hvm_vmx.vmcs_shadow_maddr. If mfn was zero here,
v->arch.hvm_vmx.vmcs_shadow_maddr would need to have been
zero already before the call.

Jan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] vVMX: use latched VMCS machine address
  2016-02-23 10:31     ` Jan Beulich
@ 2016-02-23 10:48       ` Li, Liang Z
  2016-02-24  7:04       ` Li, Liang Z
  1 sibling, 0 replies; 26+ messages in thread
From: Li, Liang Z @ 2016-02-23 10:48 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Tian, Kevin, xen-devel, Ian Campbell, AndrewCooper, Ian Jackson,
	Wang, Yong Y, osstest service owner, xen-devel, Nakajima, Jun,
	Jin, Gordon, Hu, Robert

> Thanks for getting back on this.
> 
> >> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> >> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> >> @@ -932,37 +932,36 @@ void vmx_vmcs_switch(paddr_t from, paddr
> >>      spin_unlock(&vmx->vmcs_lock);
> >>  }
> >>
> >> -void virtual_vmcs_enter(void *vvmcs)
> >> +void virtual_vmcs_enter(const struct vcpu *v)
> >>  {
> >> -    __vmptrld(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
> >> +    __vmptrld(v->arch.hvm_vmx.vmcs_shadow_maddr);
> >
> > Debug shows  v->arch.hvm_vmx.vmcs_shadow_maddr  will equal to 0 at
> > this point, this will crash the system.
> >
> >>  }
> >>
> >> -void virtual_vmcs_exit(void *vvmcs)
> >> +void virtual_vmcs_exit(const struct vcpu *v)
> >>  {
> >>      paddr_t cur = this_cpu(current_vmcs);
> >>
> >> -    __vmpclear(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
> >> +    __vmpclear(v->arch.hvm_vmx.vmcs_shadow_maddr);
> >
> > Debug shows  v->arch.hvm_vmx.vmcs_shadow_maddr  will equal to 0 at
> > this point, this will crash the system.
> 
> For both of these you provide too little context. In particular ...
> 
> > Maybe we should use pfn_to_paddr(domain_page_map_to_mfn(vvmcs)))
> here.
> 
> ... this shouldn't be necessary, since the whole purpose of the patch is to
> avoid this, making sure
> v->arch.hvm_vmx.vmcs_shadow_maddr always represents
> domain_page_map_to_mfn(vvmcs). Hence if you find the latched field to be
> zero, we'll need to understand _why_ this is so, i.e.
> what code path cleared the field (perhaps prematurely).

Yes, it's better to find out the reason for this.

> >> @@ -1694,10 +1657,10 @@ int nvmx_handle_vmclear(struct cpu_user_
> >>          rc = VMFAIL_INVALID;
> >>      else if ( gpa == nvcpu->nv_vvmcxaddr )
> >>      {
> >> -        if ( cpu_has_vmx_vmcs_shadowing )
> >> -            nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
> >> -        clear_vvmcs_launched(&nvmx->launched_list,
> >> -            domain_page_map_to_mfn(nvcpu->nv_vvmcx));
> >> +        unsigned long mfn =
> >> + PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr);
> >> +
> >> +        nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
> >> +        clear_vvmcs_launched(&nvmx->launched_list, mfn);
> >
> > v->arch.hvm_vmx.vmcs_shadow_maddr will be set to 0 in
> > nvmx_clear_vmcs_pointer()
> > so mfn will be 0 at this point, it's incorrect.
> 
> How that? mfn gets latched before calling nvmx_clear_vmcs_pointer(),
> precisely because that function would clear
> v->arch.hvm_vmx.vmcs_shadow_maddr. If mfn was zero here,
> v->arch.hvm_vmx.vmcs_shadow_maddr would need to have been
> zero already before the call.
> 
> Jan

You are right, I confused the code, mfn is set before nvmx_clear_vmcs_pointer().
Indeed, v->arch.hvm_vmx.vmcs_shadow_maddr may equal to 0 at this point, it will
cause clear_vvmcs_launched() failed to remove the vvmcs from the list.

Liang

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] vVMX: use latched VMCS machine address
  2016-02-23 10:31     ` Jan Beulich
  2016-02-23 10:48       ` Li, Liang Z
@ 2016-02-24  7:04       ` Li, Liang Z
  2016-02-24 10:32         ` Jan Beulich
  1 sibling, 1 reply; 26+ messages in thread
From: Li, Liang Z @ 2016-02-24  7:04 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Tian, Kevin, xen-devel, Ian Campbell, AndrewCooper, Ian Jackson,
	Wang, Yong Y, osstest service owner, xen-devel, Nakajima, Jun,
	Jin, Gordon, Hu, Robert

> >> -void virtual_vmcs_enter(void *vvmcs)
> >> +void virtual_vmcs_enter(const struct vcpu *v)
> >>  {
> >> -    __vmptrld(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
> >> +    __vmptrld(v->arch.hvm_vmx.vmcs_shadow_maddr);
> >
> > Debug shows  v->arch.hvm_vmx.vmcs_shadow_maddr  will equal to 0 at
> > this point, this will crash the system.
> >
> >>  }
> >>
> >> -void virtual_vmcs_exit(void *vvmcs)
> >> +void virtual_vmcs_exit(const struct vcpu *v)
> >>  {
> >>      paddr_t cur = this_cpu(current_vmcs);
> >>
> >> -    __vmpclear(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
> >> +    __vmpclear(v->arch.hvm_vmx.vmcs_shadow_maddr);
> >
> > Debug shows  v->arch.hvm_vmx.vmcs_shadow_maddr  will equal to 0 at
> > this point, this will crash the system.
> 
> For both of these you provide too little context. In particular ...
> 
> > Maybe we should use pfn_to_paddr(domain_page_map_to_mfn(vvmcs)))
> here.
> 
> ... this shouldn't be necessary, since the whole purpose of the patch is to
> avoid this, making sure
> v->arch.hvm_vmx.vmcs_shadow_maddr always represents
> domain_page_map_to_mfn(vvmcs). Hence if you find the latched field to be
> zero, we'll need to understand _why_ this is so, i.e.
> what code path cleared the field (perhaps prematurely).

Hi Jan,

 I found the code path when creating the L2 guest:
....
(XEN)nvmx_handle_vmclear
(XEN)nvmx_handle_vmptrld
(XEN)map_io_bitmap_all
(XEN)_map_io_bitmap
(XEN)virtual_vmcs_enter
(XEN)_map_io_bitmap
(XEN)virtual_vmcs_enter
(XEN)_map_msr_bitmap
(XEN)virtual_vmcs_enter
(XEN)nvmx_set_vmcs_pointer
(XEN)nvmx_handle_vmwrite
....

so the virtual_vmcs_enter() will be called before the nvmx_set_vmcs_pointer(),
and at this time   'v->arch.hvm_vmx.vmcs_shadow_maddr' still equal to 0.

Maybe  ' v->arch.hvm_vmx.vmcs_shadow_maddr' should be set when setting the 
'nvcpu->nv_vvmcx'  in nvmx_handle_vmptrld().

Liang

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] vVMX: use latched VMCS machine address
  2016-02-24  7:04       ` Li, Liang Z
@ 2016-02-24 10:32         ` Jan Beulich
  2016-02-24 15:08           ` Li, Liang Z
  2016-02-25  6:50           ` Li, Liang Z
  0 siblings, 2 replies; 26+ messages in thread
From: Jan Beulich @ 2016-02-24 10:32 UTC (permalink / raw)
  To: Liang Z Li
  Cc: Kevin Tian, xen-devel, Ian Campbell, AndrewCooper, Ian Jackson,
	Yong Y Wang, osstest service owner, xen-devel, Jun Nakajima,
	Gordon Jin, Robert Hu

[-- Attachment #1: Type: text/plain, Size: 1361 bytes --]

Liang,

>>> On 24.02.16 at 08:04, <liang.z.li@intel.com> wrote:
>  I found the code path when creating the L2 guest:

thanks for the analysis!

> (XEN)nvmx_handle_vmclear
> (XEN)nvmx_handle_vmptrld
> (XEN)map_io_bitmap_all
> (XEN)_map_io_bitmap
> (XEN)virtual_vmcs_enter
> (XEN)_map_io_bitmap
> (XEN)virtual_vmcs_enter
> (XEN)_map_msr_bitmap
> (XEN)virtual_vmcs_enter
> (XEN)nvmx_set_vmcs_pointer
> (XEN)nvmx_handle_vmwrite
> ....
> 
> so the virtual_vmcs_enter() will be called before the 
> nvmx_set_vmcs_pointer(),
> and at this time   'v->arch.hvm_vmx.vmcs_shadow_maddr' still equal to 0.

So this finally explains the difference in behavior between different
hardware - without VMCS shadowing we wouldn't reach
virtual_vmcs_enter() here.

> Maybe  ' v->arch.hvm_vmx.vmcs_shadow_maddr' should be set when setting the 
> 'nvcpu->nv_vvmcx'  in nvmx_handle_vmptrld().

Right, this looks to be the only viable option. In particular, deferring
map_io_bitmap_all() and _map_msr_bitmap() cannot reasonably be
moved past nvmx_set_vmcs_pointer(), since failure of any of them
would then require further unrolling, which seems undesirable. Plus
doing it this way allows undoing some of the changes done before.

Attached the updated patch - could you please give this another
try (on top of current staging or master)?

Jan


[-- Attachment #2: VMX-VMCS-shadow-addr.patch --]
[-- Type: text/plain, Size: 41415 bytes --]

vVMX: use latched VMCS machine address

Instead of calling domain_page_map_to_mfn() over and over, latch the
guest VMCS machine address unconditionally (i.e. independent of whether
VMCS shadowing is supported by the hardware).

Since this requires altering the parameters of __[gs]et_vmcs{,_real}()
(and hence all their callers) anyway, take the opportunity to also drop
the bogus double underscores from their names (and from
__[gs]et_vmcs_virtual() as well).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Correct arguments passed to virtual_vmcs_{enter,exit}() (two
    instances each). Move setting/clearing of the latched address out
    of nvmx_{set,clear}_vmcs_pointer(), allowing to otherwise restore
    them to the way they were before.
    Thanks to Liang Z Li <liang.z.li@intel.com> for the debugging.

--- unstable.orig/xen/arch/x86/hvm/vmx/intr.c
+++ unstable/xen/arch/x86/hvm/vmx/intr.c
@@ -191,13 +191,13 @@ static int nvmx_intr_intercept(struct vc
         if ( intack.source == hvm_intsrc_pic ||
                  intack.source == hvm_intsrc_lapic )
         {
-            ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
+            ctrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
             if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
                 return 0;
 
             vmx_inject_extint(intack.vector, intack.source);
 
-            ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, VM_EXIT_CONTROLS);
+            ctrl = get_vvmcs(v, VM_EXIT_CONTROLS);
             if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
             {
                 /* for now, duplicate the ack path in vmx_intr_assist */
--- unstable.orig/xen/arch/x86/hvm/vmx/vmcs.c
+++ unstable/xen/arch/x86/hvm/vmx/vmcs.c
@@ -935,37 +935,36 @@ void vmx_vmcs_switch(paddr_t from, paddr
     spin_unlock(&vmx->vmcs_lock);
 }
 
-void virtual_vmcs_enter(void *vvmcs)
+void virtual_vmcs_enter(const struct vcpu *v)
 {
-    __vmptrld(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
+    __vmptrld(v->arch.hvm_vmx.vmcs_shadow_maddr);
 }
 
-void virtual_vmcs_exit(void *vvmcs)
+void virtual_vmcs_exit(const struct vcpu *v)
 {
     paddr_t cur = this_cpu(current_vmcs);
 
-    __vmpclear(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
+    __vmpclear(v->arch.hvm_vmx.vmcs_shadow_maddr);
     if ( cur )
         __vmptrld(cur);
-
 }
 
-u64 virtual_vmcs_vmread(void *vvmcs, u32 vmcs_encoding)
+u64 virtual_vmcs_vmread(const struct vcpu *v, u32 vmcs_encoding)
 {
     u64 res;
 
-    virtual_vmcs_enter(vvmcs);
+    virtual_vmcs_enter(v);
     __vmread(vmcs_encoding, &res);
-    virtual_vmcs_exit(vvmcs);
+    virtual_vmcs_exit(v);
 
     return res;
 }
 
-void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val)
+void virtual_vmcs_vmwrite(const struct vcpu *v, u32 vmcs_encoding, u64 val)
 {
-    virtual_vmcs_enter(vvmcs);
+    virtual_vmcs_enter(v);
     __vmwrite(vmcs_encoding, val);
-    virtual_vmcs_exit(vvmcs);
+    virtual_vmcs_exit(v);
 }
 
 /*
--- unstable.orig/xen/arch/x86/hvm/vmx/vmx.c
+++ unstable/xen/arch/x86/hvm/vmx/vmx.c
@@ -1477,8 +1477,7 @@ void vmx_inject_extint(int trap, uint8_t
     u32    pin_based_cntrl;
 
     if ( nestedhvm_vcpu_in_guestmode(v) ) {
-        pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, 
-                                     PIN_BASED_VM_EXEC_CONTROL);
+        pin_based_cntrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
         if ( pin_based_cntrl & PIN_BASED_EXT_INTR_MASK ) {
             nvmx_enqueue_n2_exceptions (v, 
                INTR_INFO_VALID_MASK |
@@ -1498,8 +1497,7 @@ void vmx_inject_nmi(void)
     u32    pin_based_cntrl;
 
     if ( nestedhvm_vcpu_in_guestmode(v) ) {
-        pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, 
-                                     PIN_BASED_VM_EXEC_CONTROL);
+        pin_based_cntrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
         if ( pin_based_cntrl & PIN_BASED_NMI_EXITING ) {
             nvmx_enqueue_n2_exceptions (v, 
                INTR_INFO_VALID_MASK |
--- unstable.orig/xen/arch/x86/hvm/vmx/vvmx.c
+++ unstable/xen/arch/x86/hvm/vmx/vvmx.c
@@ -175,11 +175,7 @@ int nvmx_vcpu_reset(struct vcpu *v)
 
 uint64_t nvmx_vcpu_eptp_base(struct vcpu *v)
 {
-    uint64_t eptp_base;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-
-    eptp_base = __get_vvmcs(nvcpu->nv_vvmcx, EPT_POINTER);
-    return eptp_base & PAGE_MASK;
+    return get_vvmcs(v, EPT_POINTER) & PAGE_MASK;
 }
 
 bool_t nvmx_ept_enabled(struct vcpu *v)
@@ -236,7 +232,7 @@ static int vvmcs_offset(u32 width, u32 t
     return offset;
 }
 
-u64 __get_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding)
+u64 get_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding)
 {
     union vmcs_encoding enc;
     u64 *content = (u64 *) vvmcs;
@@ -266,12 +262,12 @@ u64 __get_vvmcs_virtual(void *vvmcs, u32
     return res;
 }
 
-u64 __get_vvmcs_real(void *vvmcs, u32 vmcs_encoding)
+u64 get_vvmcs_real(const struct vcpu *v, u32 encoding)
 {
-    return virtual_vmcs_vmread(vvmcs, vmcs_encoding);
+    return virtual_vmcs_vmread(v, encoding);
 }
 
-void __set_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding, u64 val)
+void set_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding, u64 val)
 {
     union vmcs_encoding enc;
     u64 *content = (u64 *) vvmcs;
@@ -307,9 +303,9 @@ void __set_vvmcs_virtual(void *vvmcs, u3
     content[offset] = res;
 }
 
-void __set_vvmcs_real(void *vvmcs, u32 vmcs_encoding, u64 val)
+void set_vvmcs_real(const struct vcpu *v, u32 encoding, u64 val)
 {
-    virtual_vmcs_vmwrite(vvmcs, vmcs_encoding, val);
+    virtual_vmcs_vmwrite(v, encoding, val);
 }
 
 static unsigned long reg_read(struct cpu_user_regs *regs,
@@ -331,25 +327,20 @@ static void reg_write(struct cpu_user_re
 
 static inline u32 __n2_pin_exec_control(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-
-    return __get_vvmcs(nvcpu->nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
+    return get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
 }
 
 static inline u32 __n2_exec_control(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-
-    return __get_vvmcs(nvcpu->nv_vvmcx, CPU_BASED_VM_EXEC_CONTROL);
+    return get_vvmcs(v, CPU_BASED_VM_EXEC_CONTROL);
 }
 
 static inline u32 __n2_secondary_exec_control(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u64 second_ctrl = 0;
 
     if ( __n2_exec_control(v) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
-        second_ctrl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL);
+        second_ctrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
 
     return second_ctrl;
 }
@@ -502,18 +493,17 @@ static void vmreturn(struct cpu_user_reg
 bool_t nvmx_intercepts_exception(struct vcpu *v, unsigned int trap,
                                  int error_code)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u32 exception_bitmap, pfec_match=0, pfec_mask=0;
     int r;
 
     ASSERT ( trap < 32 );
 
-    exception_bitmap = __get_vvmcs(nvcpu->nv_vvmcx, EXCEPTION_BITMAP);
+    exception_bitmap = get_vvmcs(v, EXCEPTION_BITMAP);
     r = exception_bitmap & (1 << trap) ? 1: 0;
 
     if ( trap == TRAP_page_fault ) {
-        pfec_match = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MATCH);
-        pfec_mask  = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MASK);
+        pfec_match = get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MATCH);
+        pfec_mask  = get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MASK);
         if ( (error_code & pfec_mask) != pfec_match )
             r = !r;
     }
@@ -528,9 +518,7 @@ static inline u32 __shadow_control(struc
                                  unsigned int field,
                                  u32 host_value)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-
-    return (u32) __get_vvmcs(nvcpu->nv_vvmcx, field) | host_value;
+    return get_vvmcs(v, field) | host_value;
 }
 
 static void set_shadow_control(struct vcpu *v,
@@ -597,13 +585,12 @@ void nvmx_update_secondary_exec_control(
                                         unsigned long host_cntrl)
 {
     u32 shadow_cntrl;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
     u32 apicv_bit = SECONDARY_EXEC_APIC_REGISTER_VIRT |
                     SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
 
     host_cntrl &= ~apicv_bit;
-    shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL);
+    shadow_cntrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
 
     /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
     ASSERT(!(shadow_cntrl & apicv_bit));
@@ -616,10 +603,9 @@ void nvmx_update_secondary_exec_control(
 static void nvmx_update_pin_control(struct vcpu *v, unsigned long host_cntrl)
 {
     u32 shadow_cntrl;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
 
     host_cntrl &= ~PIN_BASED_POSTED_INTERRUPT;
-    shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
+    shadow_cntrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
 
     /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
     ASSERT(!(shadow_cntrl & PIN_BASED_POSTED_INTERRUPT));
@@ -631,9 +617,8 @@ static void nvmx_update_pin_control(stru
 static void nvmx_update_exit_control(struct vcpu *v, unsigned long host_cntrl)
 {
     u32 shadow_cntrl;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
 
-    shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_CONTROLS);
+    shadow_cntrl = get_vvmcs(v, VM_EXIT_CONTROLS);
     shadow_cntrl &= ~(VM_EXIT_SAVE_DEBUG_CNTRLS 
                       | VM_EXIT_LOAD_HOST_PAT
                       | VM_EXIT_LOAD_HOST_EFER
@@ -645,9 +630,8 @@ static void nvmx_update_exit_control(str
 static void nvmx_update_entry_control(struct vcpu *v)
 {
     u32 shadow_cntrl;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
 
-    shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, VM_ENTRY_CONTROLS);
+    shadow_cntrl = get_vvmcs(v, VM_ENTRY_CONTROLS);
     shadow_cntrl &= ~(VM_ENTRY_LOAD_GUEST_PAT
                       | VM_ENTRY_LOAD_GUEST_EFER
                       | VM_ENTRY_LOAD_PERF_GLOBAL_CTRL);
@@ -661,7 +645,6 @@ void nvmx_update_exception_bitmap(struct
 
 static void nvmx_update_apic_access_address(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u32 ctrl;
 
     ctrl = __n2_secondary_exec_control(v);
@@ -671,7 +654,7 @@ static void nvmx_update_apic_access_addr
         unsigned long apic_gpfn;
         struct page_info *apic_pg;
 
-        apic_gpfn = __get_vvmcs(nvcpu->nv_vvmcx, APIC_ACCESS_ADDR) >> PAGE_SHIFT;
+        apic_gpfn = get_vvmcs(v, APIC_ACCESS_ADDR) >> PAGE_SHIFT;
         apic_pg = get_page_from_gfn(v->domain, apic_gpfn, &p2mt, P2M_ALLOC);
         ASSERT(apic_pg && !p2m_is_paging(p2mt));
         __vmwrite(APIC_ACCESS_ADDR, page_to_maddr(apic_pg));
@@ -683,7 +666,6 @@ static void nvmx_update_apic_access_addr
 
 static void nvmx_update_virtual_apic_address(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u32 ctrl;
 
     ctrl = __n2_exec_control(v);
@@ -693,7 +675,7 @@ static void nvmx_update_virtual_apic_add
         unsigned long vapic_gpfn;
         struct page_info *vapic_pg;
 
-        vapic_gpfn = __get_vvmcs(nvcpu->nv_vvmcx, VIRTUAL_APIC_PAGE_ADDR) >> PAGE_SHIFT;
+        vapic_gpfn = get_vvmcs(v, VIRTUAL_APIC_PAGE_ADDR) >> PAGE_SHIFT;
         vapic_pg = get_page_from_gfn(v->domain, vapic_gpfn, &p2mt, P2M_ALLOC);
         ASSERT(vapic_pg && !p2m_is_paging(p2mt));
         __vmwrite(VIRTUAL_APIC_PAGE_ADDR, page_to_maddr(vapic_pg));
@@ -705,23 +687,20 @@ static void nvmx_update_virtual_apic_add
 
 static void nvmx_update_tpr_threshold(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u32 ctrl = __n2_exec_control(v);
+
     if ( ctrl & CPU_BASED_TPR_SHADOW )
-        __vmwrite(TPR_THRESHOLD, __get_vvmcs(nvcpu->nv_vvmcx, TPR_THRESHOLD));
+        __vmwrite(TPR_THRESHOLD, get_vvmcs(v, TPR_THRESHOLD));
     else
         __vmwrite(TPR_THRESHOLD, 0);
 }
 
 static void nvmx_update_pfec(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
-
     __vmwrite(PAGE_FAULT_ERROR_CODE_MASK,
-        __get_vvmcs(vvmcs, PAGE_FAULT_ERROR_CODE_MASK));
+              get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MASK));
     __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH,
-        __get_vvmcs(vvmcs, PAGE_FAULT_ERROR_CODE_MATCH));
+              get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MATCH));
 }
 
 static void __clear_current_vvmcs(struct vcpu *v)
@@ -739,7 +718,7 @@ static bool_t __must_check _map_msr_bitm
 
     if ( nvmx->msrbitmap )
         hvm_unmap_guest_frame(nvmx->msrbitmap, 1);
-    gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, MSR_BITMAP);
+    gpa = get_vvmcs(v, MSR_BITMAP);
     nvmx->msrbitmap = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1);
 
     return nvmx->msrbitmap != NULL;
@@ -754,7 +733,7 @@ static bool_t __must_check _map_io_bitma
     index = vmcs_reg == IO_BITMAP_A ? 0 : 1;
     if (nvmx->iobitmap[index])
         hvm_unmap_guest_frame(nvmx->iobitmap[index], 1);
-    gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg);
+    gpa = get_vvmcs(v, vmcs_reg);
     nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1);
 
     return nvmx->iobitmap[index] != NULL;
@@ -777,6 +756,7 @@ static void nvmx_purge_vvmcs(struct vcpu
         hvm_unmap_guest_frame(nvcpu->nv_vvmcx, 1);
     nvcpu->nv_vvmcx = NULL;
     nvcpu->nv_vvmcxaddr = VMCX_EADDR;
+    v->arch.hvm_vmx.vmcs_shadow_maddr = 0;
     for (i=0; i<2; i++) {
         if ( nvmx->iobitmap[i] ) {
             hvm_unmap_guest_frame(nvmx->iobitmap[i], 1);
@@ -792,11 +772,10 @@ static void nvmx_purge_vvmcs(struct vcpu
 u64 nvmx_get_tsc_offset(struct vcpu *v)
 {
     u64 offset = 0;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
 
-    if ( __get_vvmcs(nvcpu->nv_vvmcx, CPU_BASED_VM_EXEC_CONTROL) &
+    if ( get_vvmcs(v, CPU_BASED_VM_EXEC_CONTROL) &
          CPU_BASED_USE_TSC_OFFSETING )
-        offset = __get_vvmcs(nvcpu->nv_vvmcx, TSC_OFFSET);
+        offset = get_vvmcs(v, TSC_OFFSET);
 
     return offset;
 }
@@ -911,19 +890,14 @@ static struct vmcs_host_to_guest {
     {HOST_SYSENTER_EIP, GUEST_SYSENTER_EIP},
 };
 
-static void vvmcs_to_shadow(void *vvmcs, unsigned int field)
+static void vvmcs_to_shadow(const struct vcpu *v, unsigned int field)
 {
-    u64 value;
-
-    value = __get_vvmcs(vvmcs, field);
-    __vmwrite(field, value);
+    __vmwrite(field, get_vvmcs(v, field));
 }
 
 static void vvmcs_to_shadow_bulk(struct vcpu *v, unsigned int n,
                                  const u16 *field)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
     u64 *value = this_cpu(vvmcs_buf);
     unsigned int i;
 
@@ -938,10 +912,10 @@ static void vvmcs_to_shadow_bulk(struct
         goto fallback;
     }
 
-    virtual_vmcs_enter(vvmcs);
+    virtual_vmcs_enter(v);
     for ( i = 0; i < n; i++ )
         __vmread(field[i], &value[i]);
-    virtual_vmcs_exit(vvmcs);
+    virtual_vmcs_exit(v);
 
     for ( i = 0; i < n; i++ )
         __vmwrite(field[i], value[i]);
@@ -950,22 +924,20 @@ static void vvmcs_to_shadow_bulk(struct
 
 fallback:
     for ( i = 0; i < n; i++ )
-        vvmcs_to_shadow(vvmcs, field[i]);
+        vvmcs_to_shadow(v, field[i]);
 }
 
-static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(const struct vcpu *v, unsigned int field)
 {
     unsigned long value;
 
     if ( __vmread_safe(field, &value) )
-        __set_vvmcs(vvmcs, field, value);
+        set_vvmcs(v, field, value);
 }
 
 static void shadow_to_vvmcs_bulk(struct vcpu *v, unsigned int n,
                                  const u16 *field)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
     u64 *value = this_cpu(vvmcs_buf);
     unsigned int i;
 
@@ -983,16 +955,16 @@ static void shadow_to_vvmcs_bulk(struct
     for ( i = 0; i < n; i++ )
         __vmread(field[i], &value[i]);
 
-    virtual_vmcs_enter(vvmcs);
+    virtual_vmcs_enter(v);
     for ( i = 0; i < n; i++ )
         __vmwrite(field[i], value[i]);
-    virtual_vmcs_exit(vvmcs);
+    virtual_vmcs_exit(v);
 
     return;
 
 fallback:
     for ( i = 0; i < n; i++ )
-        shadow_to_vvmcs(vvmcs, field[i]);
+        shadow_to_vvmcs(v, field[i]);
 }
 
 static void load_shadow_control(struct vcpu *v)
@@ -1017,7 +989,6 @@ static void load_shadow_control(struct v
 static void load_shadow_guest_state(struct vcpu *v)
 {
     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
     u32 control;
     u64 cr_gh_mask, cr_read_shadow;
 
@@ -1031,18 +1002,18 @@ static void load_shadow_guest_state(stru
     vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmcs_gstate_field),
                          vmcs_gstate_field);
 
-    nvcpu->guest_cr[0] = __get_vvmcs(vvmcs, CR0_READ_SHADOW);
-    nvcpu->guest_cr[4] = __get_vvmcs(vvmcs, CR4_READ_SHADOW);
-    hvm_set_cr0(__get_vvmcs(vvmcs, GUEST_CR0), 1);
-    hvm_set_cr4(__get_vvmcs(vvmcs, GUEST_CR4), 1);
-    hvm_set_cr3(__get_vvmcs(vvmcs, GUEST_CR3), 1);
+    nvcpu->guest_cr[0] = get_vvmcs(v, CR0_READ_SHADOW);
+    nvcpu->guest_cr[4] = get_vvmcs(v, CR4_READ_SHADOW);
+    hvm_set_cr0(get_vvmcs(v, GUEST_CR0), 1);
+    hvm_set_cr4(get_vvmcs(v, GUEST_CR4), 1);
+    hvm_set_cr3(get_vvmcs(v, GUEST_CR3), 1);
 
-    control = __get_vvmcs(vvmcs, VM_ENTRY_CONTROLS);
+    control = get_vvmcs(v, VM_ENTRY_CONTROLS);
     if ( control & VM_ENTRY_LOAD_GUEST_PAT )
-        hvm_set_guest_pat(v, __get_vvmcs(vvmcs, GUEST_PAT));
+        hvm_set_guest_pat(v, get_vvmcs(v, GUEST_PAT));
     if ( control & VM_ENTRY_LOAD_PERF_GLOBAL_CTRL )
         hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL,
-                                __get_vvmcs(vvmcs, GUEST_PERF_GLOBAL_CTRL), 0);
+                                get_vvmcs(v, GUEST_PERF_GLOBAL_CTRL), 0);
 
     hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
 
@@ -1053,14 +1024,14 @@ static void load_shadow_guest_state(stru
      * guest host mask to 0xffffffff in shadow VMCS (follow the host L1 VMCS),
      * then calculate the corresponding read shadow separately for CR0 and CR4.
      */
-    cr_gh_mask = __get_vvmcs(vvmcs, CR0_GUEST_HOST_MASK);
-    cr_read_shadow = (__get_vvmcs(vvmcs, GUEST_CR0) & ~cr_gh_mask) |
-                     (__get_vvmcs(vvmcs, CR0_READ_SHADOW) & cr_gh_mask);
+    cr_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
+    cr_read_shadow = (get_vvmcs(v, GUEST_CR0) & ~cr_gh_mask) |
+                     (get_vvmcs(v, CR0_READ_SHADOW) & cr_gh_mask);
     __vmwrite(CR0_READ_SHADOW, cr_read_shadow);
 
-    cr_gh_mask = __get_vvmcs(vvmcs, CR4_GUEST_HOST_MASK);
-    cr_read_shadow = (__get_vvmcs(vvmcs, GUEST_CR4) & ~cr_gh_mask) |
-                     (__get_vvmcs(vvmcs, CR4_READ_SHADOW) & cr_gh_mask);
+    cr_gh_mask = get_vvmcs(v, CR4_GUEST_HOST_MASK);
+    cr_read_shadow = (get_vvmcs(v, GUEST_CR4) & ~cr_gh_mask) |
+                     (get_vvmcs(v, CR4_READ_SHADOW) & cr_gh_mask);
     __vmwrite(CR4_READ_SHADOW, cr_read_shadow);
 
     /* TODO: CR3 target control */
@@ -1084,11 +1055,11 @@ static uint64_t get_host_eptp(struct vcp
     return ept_get_eptp(ept_data);
 }
 
-static bool_t nvmx_vpid_enabled(struct nestedvcpu *nvcpu)
+static bool_t nvmx_vpid_enabled(const struct vcpu *v)
 {
     uint32_t second_cntl;
 
-    second_cntl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL);
+    second_cntl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
     if ( second_cntl & SECONDARY_EXEC_ENABLE_VPID )
         return 1;
     return 0;
@@ -1096,12 +1067,10 @@ static bool_t nvmx_vpid_enabled(struct n
 
 static void nvmx_set_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
 {
-    unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
-    paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
+    paddr_t vvmcs_maddr = v->arch.hvm_vmx.vmcs_shadow_maddr;
 
     __vmpclear(vvmcs_maddr);
     vvmcs->vmcs_revision_id |= VMCS_RID_TYPE_MASK;
-    v->arch.hvm_vmx.vmcs_shadow_maddr = vvmcs_maddr;
     __vmwrite(VMCS_LINK_POINTER, vvmcs_maddr);
     __vmwrite(VMREAD_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmread_bitmap));
     __vmwrite(VMWRITE_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmwrite_bitmap));
@@ -1109,12 +1078,10 @@ static void nvmx_set_vmcs_pointer(struct
 
 static void nvmx_clear_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
 {
-    unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
-    paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
+    paddr_t vvmcs_maddr = v->arch.hvm_vmx.vmcs_shadow_maddr;
 
     __vmpclear(vvmcs_maddr);
     vvmcs->vmcs_revision_id &= ~VMCS_RID_TYPE_MASK;
-    v->arch.hvm_vmx.vmcs_shadow_maddr = 0;
     __vmwrite(VMCS_LINK_POINTER, ~0ul);
     __vmwrite(VMREAD_BITMAP, 0);
     __vmwrite(VMWRITE_BITMAP, 0);
@@ -1124,7 +1091,6 @@ static void virtual_vmentry(struct cpu_u
 {
     struct vcpu *v = current;
     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
     unsigned long lm_l1, lm_l2;
 
     vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n2vmcx_pa);
@@ -1143,8 +1109,7 @@ static void virtual_vmentry(struct cpu_u
      * L1 exit_controls
      */
     lm_l1 = !!hvm_long_mode_enabled(v);
-    lm_l2 = !!(__get_vvmcs(vvmcs, VM_ENTRY_CONTROLS) &
-                           VM_ENTRY_IA32E_MODE);
+    lm_l2 = !!(get_vvmcs(v, VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
 
     if ( lm_l2 )
         v->arch.hvm_vcpu.guest_efer |= EFER_LMA | EFER_LME;
@@ -1161,9 +1126,9 @@ static void virtual_vmentry(struct cpu_u
          !(v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
         vvmcs_to_shadow_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields);
 
-    regs->eip = __get_vvmcs(vvmcs, GUEST_RIP);
-    regs->esp = __get_vvmcs(vvmcs, GUEST_RSP);
-    regs->eflags = __get_vvmcs(vvmcs, GUEST_RFLAGS);
+    regs->eip = get_vvmcs(v, GUEST_RIP);
+    regs->esp = get_vvmcs(v, GUEST_RSP);
+    regs->eflags = get_vvmcs(v, GUEST_RFLAGS);
 
     /* updating host cr0 to sync TS bit */
     __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
@@ -1175,10 +1140,10 @@ static void virtual_vmentry(struct cpu_u
         __vmwrite(EPT_POINTER, get_host_eptp(v));
 
     /* nested VPID support! */
-    if ( cpu_has_vmx_vpid && nvmx_vpid_enabled(nvcpu) )
+    if ( cpu_has_vmx_vpid && nvmx_vpid_enabled(v) )
     {
         struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
-        uint32_t new_vpid =  __get_vvmcs(vvmcs, VIRTUAL_PROCESSOR_ID);
+        uint32_t new_vpid = get_vvmcs(v, VIRTUAL_PROCESSOR_ID);
 
         if ( nvmx->guest_vpid != new_vpid )
         {
@@ -1191,34 +1156,29 @@ static void virtual_vmentry(struct cpu_u
 
 static void sync_vvmcs_guest_state(struct vcpu *v, struct cpu_user_regs *regs)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
-
     /* copy shadow vmcs.gstate back to vvmcs.gstate */
     shadow_to_vvmcs_bulk(v, ARRAY_SIZE(vmcs_gstate_field),
                          vmcs_gstate_field);
     /* RIP, RSP are in user regs */
-    __set_vvmcs(vvmcs, GUEST_RIP, regs->eip);
-    __set_vvmcs(vvmcs, GUEST_RSP, regs->esp);
+    set_vvmcs(v, GUEST_RIP, regs->eip);
+    set_vvmcs(v, GUEST_RSP, regs->esp);
 
     /* CR3 sync if exec doesn't want cr3 load exiting: i.e. nested EPT */
     if ( !(__n2_exec_control(v) & CPU_BASED_CR3_LOAD_EXITING) )
-        shadow_to_vvmcs(vvmcs, GUEST_CR3);
+        shadow_to_vvmcs(v, GUEST_CR3);
 }
 
 static void sync_vvmcs_ro(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
-    void *vvmcs = nvcpu->nv_vvmcx;
 
     shadow_to_vvmcs_bulk(v, ARRAY_SIZE(vmcs_ro_field), vmcs_ro_field);
 
     /* Adjust exit_reason/exit_qualifciation for violation case */
-    if ( __get_vvmcs(vvmcs, VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION )
+    if ( get_vvmcs(v, VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION )
     {
-        __set_vvmcs(vvmcs, EXIT_QUALIFICATION, nvmx->ept.exit_qual);
-        __set_vvmcs(vvmcs, VM_EXIT_REASON, nvmx->ept.exit_reason);
+        set_vvmcs(v, EXIT_QUALIFICATION, nvmx->ept.exit_qual);
+        set_vvmcs(v, VM_EXIT_REASON, nvmx->ept.exit_reason);
     }
 }
 
@@ -1226,34 +1186,32 @@ static void load_vvmcs_host_state(struct
 {
     int i;
     u64 r;
-    void *vvmcs = vcpu_nestedhvm(v).nv_vvmcx;
     u32 control;
 
     for ( i = 0; i < ARRAY_SIZE(vmcs_h2g_field); i++ )
     {
-        r = __get_vvmcs(vvmcs, vmcs_h2g_field[i].host_field);
+        r = get_vvmcs(v, vmcs_h2g_field[i].host_field);
         __vmwrite(vmcs_h2g_field[i].guest_field, r);
     }
 
-    hvm_set_cr0(__get_vvmcs(vvmcs, HOST_CR0), 1);
-    hvm_set_cr4(__get_vvmcs(vvmcs, HOST_CR4), 1);
-    hvm_set_cr3(__get_vvmcs(vvmcs, HOST_CR3), 1);
+    hvm_set_cr0(get_vvmcs(v, HOST_CR0), 1);
+    hvm_set_cr4(get_vvmcs(v, HOST_CR4), 1);
+    hvm_set_cr3(get_vvmcs(v, HOST_CR3), 1);
 
-    control = __get_vvmcs(vvmcs, VM_EXIT_CONTROLS);
+    control = get_vvmcs(v, VM_EXIT_CONTROLS);
     if ( control & VM_EXIT_LOAD_HOST_PAT )
-        hvm_set_guest_pat(v, __get_vvmcs(vvmcs, HOST_PAT));
+        hvm_set_guest_pat(v, get_vvmcs(v, HOST_PAT));
     if ( control & VM_EXIT_LOAD_PERF_GLOBAL_CTRL )
         hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL,
-                                __get_vvmcs(vvmcs, HOST_PERF_GLOBAL_CTRL), 1);
+                                get_vvmcs(v, HOST_PERF_GLOBAL_CTRL), 1);
 
     hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
 
-    __set_vvmcs(vvmcs, VM_ENTRY_INTR_INFO, 0);
+    set_vvmcs(v, VM_ENTRY_INTR_INFO, 0);
 }
 
 static void sync_exception_state(struct vcpu *v)
 {
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
 
     if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) )
@@ -1263,10 +1221,9 @@ static void sync_exception_state(struct
     {
     case X86_EVENTTYPE_EXT_INTR:
         /* rename exit_reason to EXTERNAL_INTERRUPT */
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON,
-                    EXIT_REASON_EXTERNAL_INTERRUPT);
-        __set_vvmcs(nvcpu->nv_vvmcx, EXIT_QUALIFICATION, 0);
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
+        set_vvmcs(v, VM_EXIT_REASON, EXIT_REASON_EXTERNAL_INTERRUPT);
+        set_vvmcs(v, EXIT_QUALIFICATION, 0);
+        set_vvmcs(v, VM_EXIT_INTR_INFO,
                     nvmx->intr.intr_info);
         break;
 
@@ -1274,17 +1231,13 @@ static void sync_exception_state(struct
     case X86_EVENTTYPE_SW_INTERRUPT:
     case X86_EVENTTYPE_SW_EXCEPTION:
         /* throw to L1 */
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
-                    nvmx->intr.intr_info);
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_ERROR_CODE,
-                    nvmx->intr.error_code);
+        set_vvmcs(v, VM_EXIT_INTR_INFO, nvmx->intr.intr_info);
+        set_vvmcs(v, VM_EXIT_INTR_ERROR_CODE, nvmx->intr.error_code);
         break;
     case X86_EVENTTYPE_NMI:
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON,
-                    EXIT_REASON_EXCEPTION_NMI);
-        __set_vvmcs(nvcpu->nv_vvmcx, EXIT_QUALIFICATION, 0);
-        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
-                    nvmx->intr.intr_info);
+        set_vvmcs(v, VM_EXIT_REASON, EXIT_REASON_EXCEPTION_NMI);
+        set_vvmcs(v, EXIT_QUALIFICATION, 0);
+        set_vvmcs(v, VM_EXIT_INTR_INFO, nvmx->intr.intr_info);
         break;
     default:
         gdprintk(XENLOG_ERR, "Exception state %lx not handled\n",
@@ -1296,9 +1249,8 @@ static void sync_exception_state(struct
 static void nvmx_update_apicv(struct vcpu *v)
 {
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-    unsigned long reason = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON);
-    uint32_t intr_info = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO);
+    unsigned long reason = get_vvmcs(v, VM_EXIT_REASON);
+    uint32_t intr_info = get_vvmcs(v, VM_EXIT_INTR_INFO);
 
     if ( reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
          nvmx->intr.source == hvm_intsrc_lapic &&
@@ -1344,8 +1296,7 @@ static void virtual_vmexit(struct cpu_us
     nvcpu->nv_vmswitch_in_progress = 1;
 
     lm_l2 = !!hvm_long_mode_enabled(v);
-    lm_l1 = !!(__get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_CONTROLS) &
-                           VM_EXIT_IA32E_MODE);
+    lm_l1 = !!(get_vvmcs(v, VM_EXIT_CONTROLS) & VM_EXIT_IA32E_MODE);
 
     if ( lm_l1 )
         v->arch.hvm_vcpu.guest_efer |= EFER_LMA | EFER_LME;
@@ -1361,8 +1312,8 @@ static void virtual_vmexit(struct cpu_us
     if ( lm_l1 != lm_l2 )
         paging_update_paging_modes(v);
 
-    regs->eip = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RIP);
-    regs->esp = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RSP);
+    regs->eip = get_vvmcs(v, HOST_RIP);
+    regs->esp = get_vvmcs(v, HOST_RSP);
     /* VM exit clears all bits except bit 1 */
     regs->eflags = 0x2;
 
@@ -1539,7 +1490,6 @@ int nvmx_handle_vmresume(struct cpu_user
 {
     bool_t launched;
     struct vcpu *v = current;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
     int rc = vmx_inst_check_privilege(regs, 0);
 
@@ -1553,7 +1503,7 @@ int nvmx_handle_vmresume(struct cpu_user
     }
 
     launched = vvmcs_launched(&nvmx->launched_list,
-                   domain_page_map_to_mfn(nvcpu->nv_vvmcx));
+                              PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr));
     if ( !launched ) {
        vmreturn (regs, VMFAIL_VALID);
        return X86EMUL_OKAY;
@@ -1565,7 +1515,6 @@ int nvmx_handle_vmlaunch(struct cpu_user
 {
     bool_t launched;
     struct vcpu *v = current;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
     int rc = vmx_inst_check_privilege(regs, 0);
 
@@ -1579,7 +1528,7 @@ int nvmx_handle_vmlaunch(struct cpu_user
     }
 
     launched = vvmcs_launched(&nvmx->launched_list,
-                   domain_page_map_to_mfn(nvcpu->nv_vvmcx));
+                              PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr));
     if ( launched ) {
        vmreturn (regs, VMFAIL_VALID);
        return X86EMUL_OKAY;
@@ -1589,7 +1538,7 @@ int nvmx_handle_vmlaunch(struct cpu_user
         if ( rc == X86EMUL_OKAY )
         {
             if ( set_vvmcs_launched(&nvmx->launched_list,
-                    domain_page_map_to_mfn(nvcpu->nv_vvmcx)) < 0 )
+                                    PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr)) < 0 )
                 return X86EMUL_UNHANDLEABLE;
         }
     }
@@ -1628,6 +1577,8 @@ int nvmx_handle_vmptrld(struct cpu_user_
             {
                 nvcpu->nv_vvmcx = vvmcx;
                 nvcpu->nv_vvmcxaddr = gpa;
+                v->arch.hvm_vmx.vmcs_shadow_maddr =
+                    pfn_to_paddr(domain_page_map_to_mfn(vvmcx));
             }
             else
             {
@@ -1697,7 +1648,7 @@ int nvmx_handle_vmclear(struct cpu_user_
         if ( cpu_has_vmx_vmcs_shadowing )
             nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
         clear_vvmcs_launched(&nvmx->launched_list,
-            domain_page_map_to_mfn(nvcpu->nv_vvmcx));
+                             PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr));
         nvmx_purge_vvmcs(v);
     }
     else 
@@ -1726,7 +1677,6 @@ int nvmx_handle_vmread(struct cpu_user_r
 {
     struct vcpu *v = current;
     struct vmx_inst_decoded decode;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     u64 value = 0;
     int rc;
 
@@ -1734,7 +1684,7 @@ int nvmx_handle_vmread(struct cpu_user_r
     if ( rc != X86EMUL_OKAY )
         return rc;
 
-    value = __get_vvmcs(nvcpu->nv_vvmcx, reg_read(regs, decode.reg2));
+    value = get_vvmcs(v, reg_read(regs, decode.reg2));
 
     switch ( decode.type ) {
     case VMX_INST_MEMREG_TYPE_MEMORY:
@@ -1755,7 +1705,6 @@ int nvmx_handle_vmwrite(struct cpu_user_
 {
     struct vcpu *v = current;
     struct vmx_inst_decoded decode;
-    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     unsigned long operand; 
     u64 vmcs_encoding;
     bool_t okay = 1;
@@ -1765,7 +1714,7 @@ int nvmx_handle_vmwrite(struct cpu_user_
         return X86EMUL_EXCEPTION;
 
     vmcs_encoding = reg_read(regs, decode.reg2);
-    __set_vvmcs(nvcpu->nv_vvmcx, vmcs_encoding, operand);
+    set_vvmcs(v, vmcs_encoding, operand);
 
     switch ( vmcs_encoding & ~VMCS_HIGH(0) )
     {
@@ -2199,7 +2148,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
         }
         else if ( (intr_info & valid_mask) == valid_mask )
         {
-            exec_bitmap =__get_vvmcs(nvcpu->nv_vvmcx, EXCEPTION_BITMAP);
+            exec_bitmap = get_vvmcs(v, EXCEPTION_BITMAP);
 
             if ( exec_bitmap & (1 << vector) )
                 nvcpu->nv_vmexit_pending = 1;
@@ -2319,8 +2268,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
              * special handler is needed if L1 doesn't intercept rdtsc,
              * avoiding changing guest_tsc and messing up timekeeping in L1
              */
-            tsc = hvm_get_guest_tsc(v);
-            tsc += __get_vvmcs(nvcpu->nv_vvmcx, TSC_OFFSET);
+            tsc = hvm_get_guest_tsc(v) + get_vvmcs(v, TSC_OFFSET);
             regs->eax = (uint32_t)tsc;
             regs->edx = (uint32_t)(tsc >> 32);
             update_guest_eip();
@@ -2409,7 +2357,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
                 val = *reg;
                 if ( cr == 0 )
                 {
-                    u64 cr0_gh_mask = __get_vvmcs(nvcpu->nv_vvmcx, CR0_GUEST_HOST_MASK);
+                    u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
 
                     __vmread(CR0_READ_SHADOW, &old_val);
                     changed_bits = old_val ^ val;
@@ -2417,14 +2365,15 @@ int nvmx_n2_vmexit_handler(struct cpu_us
                         nvcpu->nv_vmexit_pending = 1;
                     else
                     {
-                        u64 guest_cr0 = __get_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0);
-                        __set_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0,
-                                    (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
+                        u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
+
+                        set_vvmcs(v, GUEST_CR0,
+                                  (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
                     }
                 }
                 else if ( cr == 4 )
                 {
-                    u64 cr4_gh_mask = __get_vvmcs(nvcpu->nv_vvmcx, CR4_GUEST_HOST_MASK);
+                    u64 cr4_gh_mask = get_vvmcs(v, CR4_GUEST_HOST_MASK);
 
                     __vmread(CR4_READ_SHADOW, &old_val);
                     changed_bits = old_val ^ val;
@@ -2432,9 +2381,10 @@ int nvmx_n2_vmexit_handler(struct cpu_us
                         nvcpu->nv_vmexit_pending = 1;
                     else
                     {
-                        u64 guest_cr4 = __get_vvmcs(nvcpu->nv_vvmcx, GUEST_CR4);
-                        __set_vvmcs(nvcpu->nv_vvmcx, GUEST_CR4,
-                                    (guest_cr4 & cr4_gh_mask) | (val & ~cr4_gh_mask));
+                        u64 guest_cr4 = get_vvmcs(v, GUEST_CR4);
+
+                        set_vvmcs(v, GUEST_CR4,
+                                  (guest_cr4 & cr4_gh_mask) | (val & ~cr4_gh_mask));
                     }
                 }
                 else
@@ -2443,20 +2393,21 @@ int nvmx_n2_vmexit_handler(struct cpu_us
             }
             case VMX_CONTROL_REG_ACCESS_TYPE_CLTS:
             {
-                u64 cr0_gh_mask = __get_vvmcs(nvcpu->nv_vvmcx, CR0_GUEST_HOST_MASK);
+                u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
 
                 if ( cr0_gh_mask & X86_CR0_TS )
                     nvcpu->nv_vmexit_pending = 1;
                 else
                 {
-                    u64 guest_cr0 = __get_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0);
-                    __set_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0, (guest_cr0 & ~X86_CR0_TS));
+                    u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
+
+                    set_vvmcs(v, GUEST_CR0, (guest_cr0 & ~X86_CR0_TS));
                 }
                 break;
             }
             case VMX_CONTROL_REG_ACCESS_TYPE_LMSW:
             {
-                u64 cr0_gh_mask = __get_vvmcs(nvcpu->nv_vvmcx, CR0_GUEST_HOST_MASK);
+                u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
 
                 __vmread(CR0_READ_SHADOW, &old_val);
                 old_val &= X86_CR0_PE|X86_CR0_MP|X86_CR0_EM|X86_CR0_TS;
@@ -2467,8 +2418,9 @@ int nvmx_n2_vmexit_handler(struct cpu_us
                     nvcpu->nv_vmexit_pending = 1;
                 else
                 {
-                    u64 guest_cr0 = __get_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0);
-                    __set_vvmcs(nvcpu->nv_vvmcx, GUEST_CR0, (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
+                    u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
+
+                    set_vvmcs(v, GUEST_CR0, (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
                 }
                 break;
             }
@@ -2520,7 +2472,7 @@ void nvmx_set_cr_read_shadow(struct vcpu
     if ( !nestedhvm_vmswitch_in_progress(v) )
     {
         unsigned long virtual_cr_mask = 
-            __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, mask_field);
+            get_vvmcs(v, mask_field);
 
         /*
          * We get here when L2 changed cr in a way that did not change
@@ -2532,7 +2484,7 @@ void nvmx_set_cr_read_shadow(struct vcpu
          */
         v->arch.hvm_vcpu.guest_cr[cr] &= ~virtual_cr_mask;
         v->arch.hvm_vcpu.guest_cr[cr] |= virtual_cr_mask &
-            __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, cr_field);
+            get_vvmcs(v, cr_field);
     }
 
     /* nvcpu.guest_cr is what L2 write to cr actually. */
--- unstable.orig/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ unstable/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -105,7 +105,7 @@ struct arch_vmx_struct {
     /* Physical address of VMCS. */
     paddr_t              vmcs_pa;
     /* VMCS shadow machine address. */
-    paddr_t             vmcs_shadow_maddr;
+    paddr_t              vmcs_shadow_maddr;
 
     /* Protects remote usage of VMCS (VMPTRLD/VMCLEAR). */
     spinlock_t           vmcs_lock;
@@ -508,10 +508,10 @@ void vmx_vmcs_switch(paddr_t from, paddr
 void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector);
 void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector);
 int vmx_check_msr_bitmap(unsigned long *msr_bitmap, u32 msr, int access_type);
-void virtual_vmcs_enter(void *vvmcs);
-void virtual_vmcs_exit(void *vvmcs);
-u64 virtual_vmcs_vmread(void *vvmcs, u32 vmcs_encoding);
-void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val);
+void virtual_vmcs_enter(const struct vcpu *);
+void virtual_vmcs_exit(const struct vcpu *);
+u64 virtual_vmcs_vmread(const struct vcpu *, u32 encoding);
+void virtual_vmcs_vmwrite(const struct vcpu *, u32 encoding, u64 val);
 
 static inline int vmx_add_guest_msr(u32 msr)
 {
--- unstable.orig/xen/include/asm-x86/hvm/vmx/vvmx.h
+++ unstable/xen/include/asm-x86/hvm/vmx/vvmx.h
@@ -181,18 +181,20 @@ enum vvmcs_encoding_type {
     VVMCS_TYPE_HSTATE,
 };
 
-u64 __get_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding);
-u64 __get_vvmcs_real(void *vvmcs, u32 vmcs_encoding);
-void __set_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding, u64 val);
-void __set_vvmcs_real(void *vvmcs, u32 vmcs_encoding, u64 val);
+u64 get_vvmcs_virtual(void *vvmcs, u32 encoding);
+u64 get_vvmcs_real(const struct vcpu *, u32 encoding);
+void set_vvmcs_virtual(void *vvmcs, u32 encoding, u64 val);
+void set_vvmcs_real(const struct vcpu *, u32 encoding, u64 val);
 
-#define __get_vvmcs(_vvmcs, _vmcs_encoding) \
-  (cpu_has_vmx_vmcs_shadowing ? __get_vvmcs_real(_vvmcs, _vmcs_encoding) \
-                              : __get_vvmcs_virtual(_vvmcs, _vmcs_encoding))
+#define get_vvmcs(vcpu, encoding) \
+  (cpu_has_vmx_vmcs_shadowing ? \
+   get_vvmcs_real(vcpu, encoding) : \
+   get_vvmcs_virtual(vcpu_nestedhvm(vcpu).nv_vvmcx, encoding))
 
-#define __set_vvmcs(_vvmcs, _vmcs_encoding, _val) \
-  (cpu_has_vmx_vmcs_shadowing ? __set_vvmcs_real(_vvmcs, _vmcs_encoding, _val) \
-                              : __set_vvmcs_virtual(_vvmcs, _vmcs_encoding, _val))
+#define set_vvmcs(vcpu, encoding, val) \
+  (cpu_has_vmx_vmcs_shadowing ? \
+   set_vvmcs_real(vcpu, encoding, val) : \
+   set_vvmcs_virtual(vcpu_nestedhvm(vcpu).nv_vvmcx, encoding, val))
 
 uint64_t get_shadow_eptp(struct vcpu *v);
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] vVMX: use latched VMCS machine address
  2016-02-24 10:32         ` Jan Beulich
@ 2016-02-24 15:08           ` Li, Liang Z
  2016-02-25  6:50           ` Li, Liang Z
  1 sibling, 0 replies; 26+ messages in thread
From: Li, Liang Z @ 2016-02-24 15:08 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Tian, Kevin, xen-devel, Ian Campbell, AndrewCooper, Ian Jackson,
	Wang, Yong Y, osstest service owner, xen-devel, Nakajima, Jun,
	Jin, Gordon, Hu, Robert

> >>> On 24.02.16 at 08:04, <liang.z.li@intel.com> wrote:
> >  I found the code path when creating the L2 guest:
> 
> thanks for the analysis!
> 
> > (XEN)nvmx_handle_vmclear
> > (XEN)nvmx_handle_vmptrld
> > (XEN)map_io_bitmap_all
> > (XEN)_map_io_bitmap
> > (XEN)virtual_vmcs_enter
> > (XEN)_map_io_bitmap
> > (XEN)virtual_vmcs_enter
> > (XEN)_map_msr_bitmap
> > (XEN)virtual_vmcs_enter
> > (XEN)nvmx_set_vmcs_pointer
> > (XEN)nvmx_handle_vmwrite
> > ....
> >
> > so the virtual_vmcs_enter() will be called before the
> > nvmx_set_vmcs_pointer(),
> > and at this time   'v->arch.hvm_vmx.vmcs_shadow_maddr' still equal to 0.
> 
> So this finally explains the difference in behavior between different
> hardware - without VMCS shadowing we wouldn't reach
> virtual_vmcs_enter() here.
> 
> > Maybe  ' v->arch.hvm_vmx.vmcs_shadow_maddr' should be set when
> setting
> > the 'nvcpu->nv_vvmcx'  in nvmx_handle_vmptrld().
> 
> Right, this looks to be the only viable option. In particular, deferring
> map_io_bitmap_all() and _map_msr_bitmap() cannot reasonably be moved
> past nvmx_set_vmcs_pointer(), since failure of any of them would then
> require further unrolling, which seems undesirable. Plus doing it this way
> allows undoing some of the changes done before.
> 
> Attached the updated patch - could you please give this another try (on top
> of current staging or master)?
> 
> Jan

No problem, I will tell you the result later.

Liang

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] vVMX: use latched VMCS machine address
  2016-02-24 10:32         ` Jan Beulich
  2016-02-24 15:08           ` Li, Liang Z
@ 2016-02-25  6:50           ` Li, Liang Z
  2016-02-25  9:01             ` Jan Beulich
  1 sibling, 1 reply; 26+ messages in thread
From: Li, Liang Z @ 2016-02-25  6:50 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Tian, Kevin, xen-devel, Ian Campbell, AndrewCooper, Ian Jackson,
	Wang, Yong Y, osstest service owner, xen-devel, Nakajima, Jun,
	Jin, Gordon, Hu, Robert

> thanks for the analysis!
> 
> > (XEN)nvmx_handle_vmclear
> > (XEN)nvmx_handle_vmptrld
> > (XEN)map_io_bitmap_all
> > (XEN)_map_io_bitmap
> > (XEN)virtual_vmcs_enter
> > (XEN)_map_io_bitmap
> > (XEN)virtual_vmcs_enter
> > (XEN)_map_msr_bitmap
> > (XEN)virtual_vmcs_enter
> > (XEN)nvmx_set_vmcs_pointer
> > (XEN)nvmx_handle_vmwrite
> > ....
> >
> > so the virtual_vmcs_enter() will be called before the
> > nvmx_set_vmcs_pointer(),
> > and at this time   'v->arch.hvm_vmx.vmcs_shadow_maddr' still equal to 0.
> 
> So this finally explains the difference in behavior between different
> hardware - without VMCS shadowing we wouldn't reach
> virtual_vmcs_enter() here.
> 
> > Maybe  ' v->arch.hvm_vmx.vmcs_shadow_maddr' should be set when
> setting
> > the 'nvcpu->nv_vvmcx'  in nvmx_handle_vmptrld().
> 
> Right, this looks to be the only viable option. In particular, deferring
> map_io_bitmap_all() and _map_msr_bitmap() cannot reasonably be moved
> past nvmx_set_vmcs_pointer(), since failure of any of them would then
> require further unrolling, which seems undesirable. Plus doing it this way
> allows undoing some of the changes done before.
> 
> Attached the updated patch - could you please give this another try (on top
> of current staging or master)?
> 
> Jan

Hi Jan,

This version work well on HSW-EP on top of the current master, no obvious issue was found.

Liang

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] vVMX: use latched VMCS machine address
  2016-02-25  6:50           ` Li, Liang Z
@ 2016-02-25  9:01             ` Jan Beulich
  2016-02-25  9:42               ` Li, Liang Z
  0 siblings, 1 reply; 26+ messages in thread
From: Jan Beulich @ 2016-02-25  9:01 UTC (permalink / raw)
  To: Liang Z Li
  Cc: Kevin Tian, xen-devel, Ian Campbell, AndrewCooper, Ian Jackson,
	Yong Y Wang, osstest service owner, xen-devel, Jun Nakajima,
	Gordon Jin, Robert Hu

>>> On 25.02.16 at 07:50, <liang.z.li@intel.com> wrote:
>>  thanks for the analysis!
>> 
>> > (XEN)nvmx_handle_vmclear
>> > (XEN)nvmx_handle_vmptrld
>> > (XEN)map_io_bitmap_all
>> > (XEN)_map_io_bitmap
>> > (XEN)virtual_vmcs_enter
>> > (XEN)_map_io_bitmap
>> > (XEN)virtual_vmcs_enter
>> > (XEN)_map_msr_bitmap
>> > (XEN)virtual_vmcs_enter
>> > (XEN)nvmx_set_vmcs_pointer
>> > (XEN)nvmx_handle_vmwrite
>> > ....
>> >
>> > so the virtual_vmcs_enter() will be called before the
>> > nvmx_set_vmcs_pointer(),
>> > and at this time   'v->arch.hvm_vmx.vmcs_shadow_maddr' still equal to 0.
>> 
>> So this finally explains the difference in behavior between different
>> hardware - without VMCS shadowing we wouldn't reach
>> virtual_vmcs_enter() here.
>> 
>> > Maybe  ' v->arch.hvm_vmx.vmcs_shadow_maddr' should be set when
>> setting
>> > the 'nvcpu->nv_vvmcx'  in nvmx_handle_vmptrld().
>> 
>> Right, this looks to be the only viable option. In particular, deferring
>> map_io_bitmap_all() and _map_msr_bitmap() cannot reasonably be moved
>> past nvmx_set_vmcs_pointer(), since failure of any of them would then
>> require further unrolling, which seems undesirable. Plus doing it this way
>> allows undoing some of the changes done before.
>> 
>> Attached the updated patch - could you please give this another try (on top
>> of current staging or master)?
> 
> This version work well on HSW-EP on top of the current master, no obvious 
> issue was found.

And that was also where things didn't work before?

Thanks for your help with this!

Jan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] vVMX: use latched VMCS machine address
  2016-02-25  9:01             ` Jan Beulich
@ 2016-02-25  9:42               ` Li, Liang Z
  0 siblings, 0 replies; 26+ messages in thread
From: Li, Liang Z @ 2016-02-25  9:42 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Tian, Kevin, xen-devel, Ian Campbell, AndrewCooper, Ian Jackson,
	Wang, Yong Y, osstest service owner, xen-devel, Nakajima, Jun,
	Jin, Gordon, Hu, Robert

> >> > (XEN)nvmx_handle_vmclear
> >> > (XEN)nvmx_handle_vmptrld
> >> > (XEN)map_io_bitmap_all
> >> > (XEN)_map_io_bitmap
> >> > (XEN)virtual_vmcs_enter
> >> > (XEN)_map_io_bitmap
> >> > (XEN)virtual_vmcs_enter
> >> > (XEN)_map_msr_bitmap
> >> > (XEN)virtual_vmcs_enter
> >> > (XEN)nvmx_set_vmcs_pointer
> >> > (XEN)nvmx_handle_vmwrite
> >> > ....
> >> >
> >> > so the virtual_vmcs_enter() will be called before the
> >> > nvmx_set_vmcs_pointer(),
> >> > and at this time   'v->arch.hvm_vmx.vmcs_shadow_maddr' still equal to
> 0.
> >>
> >> So this finally explains the difference in behavior between different
> >> hardware - without VMCS shadowing we wouldn't reach
> >> virtual_vmcs_enter() here.
> >>
> >> > Maybe  ' v->arch.hvm_vmx.vmcs_shadow_maddr' should be set when
> >> setting
> >> > the 'nvcpu->nv_vvmcx'  in nvmx_handle_vmptrld().
> >>
> >> Right, this looks to be the only viable option. In particular,
> >> deferring
> >> map_io_bitmap_all() and _map_msr_bitmap() cannot reasonably be
> moved
> >> past nvmx_set_vmcs_pointer(), since failure of any of them would then
> >> require further unrolling, which seems undesirable. Plus doing it
> >> this way allows undoing some of the changes done before.
> >>
> >> Attached the updated patch - could you please give this another try
> >> (on top of current staging or master)?
> >
> > This version work well on HSW-EP on top of the current master, no
> > obvious issue was found.
> 
> And that was also where things didn't work before?
> 

Yes. I tested the v2 patch based on commit ID 3b474316,  it worked.
The v1 patch didn't work at this point.

> Thanks for your help with this!

You are welcome!
> 
> Jan

Liang

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2016-02-25 10:02 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-19 15:10 [PATCH 0/3] VMX: misc adjustments Jan Beulich
2015-10-19 15:21 ` [PATCH 1/3] VMX: re-order definitions Jan Beulich
2015-10-21  3:04   ` Tian, Kevin
2015-10-19 15:22 ` [PATCH 2/3] VMX: allocate VMCS pages from domain heap Jan Beulich
2015-10-20 10:12   ` Andrew Cooper
2015-10-20 10:35     ` Jan Beulich
2015-10-21  3:16       ` Tian, Kevin
2015-10-21  5:54         ` Jan Beulich
2015-11-23 14:28         ` Jan Beulich
2015-11-24  5:04           ` Tian, Kevin
2015-11-24  7:50             ` Jan Beulich
2015-11-24 10:55               ` Andrew Cooper
2015-11-24 10:59                 ` Andrew Cooper
2015-11-24 11:04                   ` Jan Beulich
2015-11-24 11:09                     ` Andrew Cooper
2015-10-19 15:23 ` [PATCH 3/3] vVMX: use latched VMCS machine address Jan Beulich
2015-10-21  5:44   ` Tian, Kevin
2016-02-23  8:34   ` Li, Liang Z
2016-02-23 10:31     ` Jan Beulich
2016-02-23 10:48       ` Li, Liang Z
2016-02-24  7:04       ` Li, Liang Z
2016-02-24 10:32         ` Jan Beulich
2016-02-24 15:08           ` Li, Liang Z
2016-02-25  6:50           ` Li, Liang Z
2016-02-25  9:01             ` Jan Beulich
2016-02-25  9:42               ` Li, Liang Z

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.