All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] nested vmx code clean up and restructure
@ 2012-11-21  9:04 Dongxiao Xu
  2012-11-21  9:04 ` [PATCH 1/4] nested vmx: clean up for vmcs12 read and write Dongxiao Xu
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Dongxiao Xu @ 2012-11-21  9:04 UTC (permalink / raw)
  To: kvm; +Cc: mtosatti, gleb

This patch series clean up and restructure part of the nested vmx code.
The main purpose is to abstract the vmcs12_read() and vmcs12_write() functions.
With this change, we have a unified API to get/set field values from/to vmcs12.

Thanks,
Dongxiao

Dongxiao Xu (4):
  nested vmx: clean up for vmcs12 read and write
  nested vmx: clean up for nested_cpu_has_xxx functions
  nested vmx: use vmcs12_read/write() to operate VMCS fields
  nested vmx: use a list to store the launched vmcs12 for L1 VMM

 arch/x86/kvm/vmx.c |  812 ++++++++++++++++++++++++++++++----------------------
 1 files changed, 467 insertions(+), 345 deletions(-)


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/4] nested vmx: clean up for vmcs12 read and write
  2012-11-21  9:04 [PATCH 0/4] nested vmx code clean up and restructure Dongxiao Xu
@ 2012-11-21  9:04 ` Dongxiao Xu
  2012-11-21 13:04   ` Gleb Natapov
                     ` (2 more replies)
  2012-11-21  9:04 ` [PATCH 2/4] nested vmx: clean up for nested_cpu_has_xxx functions Dongxiao Xu
                   ` (2 subsequent siblings)
  3 siblings, 3 replies; 13+ messages in thread
From: Dongxiao Xu @ 2012-11-21  9:04 UTC (permalink / raw)
  To: kvm; +Cc: mtosatti, gleb

abstract vmcs12_read and vmcs12_write functions to do the vmcs12
read/write operations.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
---
 arch/x86/kvm/vmx.c |   86 +++++++++++++++++++++++++++-------------------------
 1 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f858159..d8670e4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5407,32 +5407,67 @@ static inline int vmcs_field_readonly(unsigned long field)
  * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
  * 64-bit fields are to be returned).
  */
-static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
-					unsigned long field, u64 *ret)
+static inline u64 vmcs12_read(struct kvm_vcpu *vcpu, unsigned long field)
 {
 	short offset = vmcs_field_to_offset(field);
 	char *p;
 
-	if (offset < 0)
+	if (offset < 0) {
+		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+		skip_emulated_instruction(vcpu);
 		return 0;
+	}
 
 	p = ((char *)(get_vmcs12(vcpu))) + offset;
 
 	switch (vmcs_field_type(field)) {
 	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
-		*ret = *((natural_width *)p);
+		return *((natural_width *)p);
+	case VMCS_FIELD_TYPE_U16:
+		return *((u16 *)p);
+	case VMCS_FIELD_TYPE_U32:
+		return *((u32 *)p);
+	case VMCS_FIELD_TYPE_U64:
+		return *((u64 *)p);
+	default:
+		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+		skip_emulated_instruction(vcpu);
+		return 0; /* can never happen. */
+	}
+}
+
+static inline int vmcs12_write(struct kvm_vcpu *vcpu,
+			unsigned long field,
+			u64 value)
+{
+	short offset = vmcs_field_to_offset(field);
+	char *p;
+
+	if (offset < 0) {
+		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+		skip_emulated_instruction(vcpu);
+		return 0;
+	}
+
+	p = ((char *)(get_vmcs12(vcpu))) + offset;
+
+	switch (vmcs_field_type(field)) {
+	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+		*(natural_width *)p = value;
 		return 1;
 	case VMCS_FIELD_TYPE_U16:
-		*ret = *((u16 *)p);
+		*(u16 *)p = value;
 		return 1;
 	case VMCS_FIELD_TYPE_U32:
-		*ret = *((u32 *)p);
+		*(u32 *)p = value;
 		return 1;
 	case VMCS_FIELD_TYPE_U64:
-		*ret = *((u64 *)p);
+		*(u64 *)p = value;
 		return 1;
 	default:
-		return 0; /* can never happen. */
+		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+		skip_emulated_instruction(vcpu);
+		return 0;
 	}
 }
 
@@ -5466,11 +5501,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 	/* Decode instruction info and find the field to read */
 	field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
 	/* Read the field, zero-extended to a u64 field_value */
-	if (!vmcs12_read_any(vcpu, field, &field_value)) {
-		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-		skip_emulated_instruction(vcpu);
-		return 1;
-	}
+	field_value = vmcs12_read(vcpu, field);
 	/*
 	 * Now copy part of this value to register or memory, as requested.
 	 * Note that the number of bits actually copied is 32 or 64 depending
@@ -5500,8 +5531,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 	gva_t gva;
 	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-	char *p;
-	short offset;
 	/* The value to write might be 32 or 64 bits, depending on L1's long
 	 * mode, and eventually we need to write that into a field of several
 	 * possible lengths. The code below first zero-extends the value to 64
@@ -5537,33 +5566,8 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 		skip_emulated_instruction(vcpu);
 		return 1;
 	}
-
-	offset = vmcs_field_to_offset(field);
-	if (offset < 0) {
-		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-		skip_emulated_instruction(vcpu);
-		return 1;
-	}
-	p = ((char *) get_vmcs12(vcpu)) + offset;
-
-	switch (vmcs_field_type(field)) {
-	case VMCS_FIELD_TYPE_U16:
-		*(u16 *)p = field_value;
-		break;
-	case VMCS_FIELD_TYPE_U32:
-		*(u32 *)p = field_value;
-		break;
-	case VMCS_FIELD_TYPE_U64:
-		*(u64 *)p = field_value;
-		break;
-	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
-		*(natural_width *)p = field_value;
-		break;
-	default:
-		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-		skip_emulated_instruction(vcpu);
+	if (!vmcs12_write(vcpu, field, field_value))
 		return 1;
-	}
 
 	nested_vmx_succeed(vcpu);
 	skip_emulated_instruction(vcpu);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/4] nested vmx: clean up for nested_cpu_has_xxx functions
  2012-11-21  9:04 [PATCH 0/4] nested vmx code clean up and restructure Dongxiao Xu
  2012-11-21  9:04 ` [PATCH 1/4] nested vmx: clean up for vmcs12 read and write Dongxiao Xu
@ 2012-11-21  9:04 ` Dongxiao Xu
  2012-11-21  9:04 ` [PATCH 3/4] nested vmx: use vmcs12_read/write() to operate VMCS fields Dongxiao Xu
  2012-11-21  9:04 ` [PATCH 4/4] nested vmx: use a list to store the launched vmcs12 for L1 VMM Dongxiao Xu
  3 siblings, 0 replies; 13+ messages in thread
From: Dongxiao Xu @ 2012-11-21  9:04 UTC (permalink / raw)
  To: kvm; +Cc: mtosatti, gleb

This is a preparation for the later change, which use vmcs12_read()
and vmcs12_write() to replace the way to access vmcs12 fields.

Since the above functions uses 'vcpu' as parameter, we also use
'vcpu' as the parameter in nested_cpu_has_xxx functions.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
---
 arch/x86/kvm/vmx.c |   57 +++++++++++++++++++++++++--------------------------
 1 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d8670e4..b036f9c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -884,22 +884,22 @@ static inline bool report_flexpriority(void)
 	return flexpriority_enabled;
 }
 
-static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
+static inline bool nested_cpu_has(struct kvm_vcpu *vcpu, u32 bit)
 {
-	return vmcs12->cpu_based_vm_exec_control & bit;
+	return get_vmcs12(vcpu)->cpu_based_vm_exec_control & bit;
 }
 
-static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
+static inline bool nested_cpu_has2(struct kvm_vcpu *vcpu, u32 bit)
 {
-	return (vmcs12->cpu_based_vm_exec_control &
+	return (get_vmcs12(vcpu)->cpu_based_vm_exec_control &
 			CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
-		(vmcs12->secondary_vm_exec_control & bit);
+		(get_vmcs12(vcpu)->secondary_vm_exec_control & bit);
 }
 
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12,
-	struct kvm_vcpu *vcpu)
+static inline bool nested_cpu_has_virtual_nmis(struct kvm_vcpu *vcpu)
 {
-	return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
+	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
+		PIN_BASED_VIRTUAL_NMIS;
 }
 
 static inline bool is_exception(u32 intr_info)
@@ -1883,7 +1883,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 		/* recalculate vmcs02.TSC_OFFSET: */
 		vmcs12 = get_vmcs12(vcpu);
 		vmcs_write64(TSC_OFFSET, offset +
-			(nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
+			(nested_cpu_has(vcpu, CPU_BASED_USE_TSC_OFFSETING) ?
 			 vmcs12->tsc_offset : 0));
 	} else {
 		vmcs_write64(TSC_OFFSET, offset);
@@ -5719,7 +5719,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
 	u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
 	gpa_t bitmap;
 
-	if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS))
+	if (!nested_cpu_has(vcpu, CPU_BASED_USE_MSR_BITMAPS))
 		return 1;
 
 	/*
@@ -5775,7 +5775,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 				(vmcs12->cr3_target_count >= 4 &&
 					vmcs12->cr3_target_value3 == val))
 				return 0;
-			if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
+			if (nested_cpu_has(vcpu, CPU_BASED_CR3_LOAD_EXITING))
 				return 1;
 			break;
 		case 4:
@@ -5784,7 +5784,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 				return 1;
 			break;
 		case 8:
-			if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
+			if (nested_cpu_has(vcpu, CPU_BASED_CR8_LOAD_EXITING))
 				return 1;
 			break;
 		}
@@ -5872,15 +5872,15 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_CPUID:
 		return 1;
 	case EXIT_REASON_HLT:
-		return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
+		return nested_cpu_has(vcpu, CPU_BASED_HLT_EXITING);
 	case EXIT_REASON_INVD:
 		return 1;
 	case EXIT_REASON_INVLPG:
-		return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
+		return nested_cpu_has(vcpu, CPU_BASED_INVLPG_EXITING);
 	case EXIT_REASON_RDPMC:
-		return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
+		return nested_cpu_has(vcpu, CPU_BASED_RDPMC_EXITING);
 	case EXIT_REASON_RDTSC:
-		return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
+		return nested_cpu_has(vcpu, CPU_BASED_RDTSC_EXITING);
 	case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
 	case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
 	case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
@@ -5894,7 +5894,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_CR_ACCESS:
 		return nested_vmx_exit_handled_cr(vcpu, vmcs12);
 	case EXIT_REASON_DR_ACCESS:
-		return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
+		return nested_cpu_has(vcpu, CPU_BASED_MOV_DR_EXITING);
 	case EXIT_REASON_IO_INSTRUCTION:
 		/* TODO: support IO bitmaps */
 		return 1;
@@ -5904,25 +5904,26 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_INVALID_STATE:
 		return 1;
 	case EXIT_REASON_MWAIT_INSTRUCTION:
-		return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
+		return nested_cpu_has(vcpu, CPU_BASED_MWAIT_EXITING);
 	case EXIT_REASON_MONITOR_INSTRUCTION:
-		return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
+		return nested_cpu_has(vcpu, CPU_BASED_MONITOR_EXITING);
 	case EXIT_REASON_PAUSE_INSTRUCTION:
-		return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
-			nested_cpu_has2(vmcs12,
-				SECONDARY_EXEC_PAUSE_LOOP_EXITING);
+		return nested_cpu_has(vcpu, CPU_BASED_PAUSE_EXITING) ||
+			nested_cpu_has2(vcpu,
+					SECONDARY_EXEC_PAUSE_LOOP_EXITING);
 	case EXIT_REASON_MCE_DURING_VMENTRY:
 		return 0;
 	case EXIT_REASON_TPR_BELOW_THRESHOLD:
 		return 1;
 	case EXIT_REASON_APIC_ACCESS:
-		return nested_cpu_has2(vmcs12,
+		return nested_cpu_has2(vcpu,
 			SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
 	case EXIT_REASON_EPT_VIOLATION:
 	case EXIT_REASON_EPT_MISCONFIG:
 		return 0;
 	case EXIT_REASON_WBINVD:
-		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
+		return nested_cpu_has2(vcpu,
+			SECONDARY_EXEC_WBINVD_EXITING);
 	case EXIT_REASON_XSETBV:
 		return 1;
 	default:
@@ -5992,8 +5993,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 		       __func__, vectoring_info, exit_reason);
 
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
-	    !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
-	                                get_vmcs12(vcpu), vcpu)))) {
+	    !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(vcpu)))) {
 		if (vmx_interrupt_allowed(vcpu)) {
 			vmx->soft_vnmi_blocked = 0;
 		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
@@ -6686,8 +6686,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 			exec_control &= ~SECONDARY_EXEC_RDTSCP;
 		/* Take the following fields only from vmcs12 */
 		exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-		if (nested_cpu_has(vmcs12,
-				CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+		if (nested_cpu_has(vcpu, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
 			exec_control |= vmcs12->secondary_vm_exec_control;
 
 		if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) {
@@ -6862,7 +6861,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 		return 1;
 	}
 
-	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
+	if (nested_cpu_has2(vcpu, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
 			!IS_ALIGNED(vmcs12->apic_access_addr, PAGE_SIZE)) {
 		/*TODO: Also verify bits beyond physical address width are 0*/
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/4] nested vmx: use vmcs12_read/write() to operate VMCS fields
  2012-11-21  9:04 [PATCH 0/4] nested vmx code clean up and restructure Dongxiao Xu
  2012-11-21  9:04 ` [PATCH 1/4] nested vmx: clean up for vmcs12 read and write Dongxiao Xu
  2012-11-21  9:04 ` [PATCH 2/4] nested vmx: clean up for nested_cpu_has_xxx functions Dongxiao Xu
@ 2012-11-21  9:04 ` Dongxiao Xu
  2012-11-21  9:04 ` [PATCH 4/4] nested vmx: use a list to store the launched vmcs12 for L1 VMM Dongxiao Xu
  3 siblings, 0 replies; 13+ messages in thread
From: Dongxiao Xu @ 2012-11-21  9:04 UTC (permalink / raw)
  To: kvm; +Cc: mtosatti, gleb

When referencing vmcs12 fields, the current approach is to use
"struct.field" style. This commit replace all the current solution
by calling vmcs12_read() and vmcs12_write() fucntions.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
---
 arch/x86/kvm/vmx.c |  591 ++++++++++++++++++++++++++++------------------------
 1 files changed, 317 insertions(+), 274 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b036f9c..6687fb6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -624,6 +624,11 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 static void vmx_get_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
 
+static inline u64 vmcs12_read(struct kvm_vcpu *vcpu, unsigned long field);
+static inline int vmcs12_write(struct kvm_vcpu *vcpu,
+			unsigned long field,
+			u64 value);
+
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 /*
@@ -886,19 +891,19 @@ static inline bool report_flexpriority(void)
 
 static inline bool nested_cpu_has(struct kvm_vcpu *vcpu, u32 bit)
 {
-	return get_vmcs12(vcpu)->cpu_based_vm_exec_control & bit;
+	return vmcs12_read(vcpu, CPU_BASED_VM_EXEC_CONTROL) & bit;
 }
 
 static inline bool nested_cpu_has2(struct kvm_vcpu *vcpu, u32 bit)
 {
-	return (get_vmcs12(vcpu)->cpu_based_vm_exec_control &
+	return (vmcs12_read(vcpu, CPU_BASED_VM_EXEC_CONTROL) &
 			CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
-		(get_vmcs12(vcpu)->secondary_vm_exec_control & bit);
+		(vmcs12_read(vcpu, SECONDARY_VM_EXEC_CONTROL) & bit);
 }
 
 static inline bool nested_cpu_has_virtual_nmis(struct kvm_vcpu *vcpu)
 {
-	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
+	return vmcs12_read(vcpu, PIN_BASED_VM_EXEC_CONTROL) &
 		PIN_BASED_VIRTUAL_NMIS;
 }
 
@@ -910,7 +915,6 @@ static inline bool is_exception(u32 intr_info)
 
 static void nested_vmx_vmexit(struct kvm_vcpu *vcpu);
 static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
-			struct vmcs12 *vmcs12,
 			u32 reason, unsigned long qualification);
 
 static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
@@ -1215,7 +1219,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 	 * specified above if L1 did not want them.
 	 */
 	if (is_guest_mode(vcpu))
-		eb |= get_vmcs12(vcpu)->exception_bitmap;
+		eb |= vmcs12_read(vcpu, EXCEPTION_BITMAP);
 
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
@@ -1577,7 +1581,7 @@ static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
 	vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
 	if (is_guest_mode(vcpu))
 		vcpu->arch.cr0_guest_owned_bits &=
-			~get_vmcs12(vcpu)->cr0_guest_host_mask;
+			~vmcs12_read(vcpu, CR0_GUEST_HOST_MASK);
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
 }
 
@@ -1588,15 +1592,19 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
  * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
  * its hypervisor (cr0_read_shadow).
  */
-static inline unsigned long nested_read_cr0(struct vmcs12 *fields)
+static inline unsigned long nested_read_cr0(struct kvm_vcpu *vcpu)
 {
-	return (fields->guest_cr0 & ~fields->cr0_guest_host_mask) |
-		(fields->cr0_read_shadow & fields->cr0_guest_host_mask);
+	return (vmcs12_read(vcpu, GUEST_CR0) &
+			~vmcs12_read(vcpu, CR0_GUEST_HOST_MASK)) |
+		(vmcs12_read(vcpu, CR0_READ_SHADOW) &
+			vmcs12_read(vcpu, CR0_GUEST_HOST_MASK));
 }
-static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
+static inline unsigned long nested_read_cr4(struct kvm_vcpu *vcpu)
 {
-	return (fields->guest_cr4 & ~fields->cr4_guest_host_mask) |
-		(fields->cr4_read_shadow & fields->cr4_guest_host_mask);
+	return (vmcs12_read(vcpu, GUEST_CR4) &
+			~vmcs12_read(vcpu, CR4_GUEST_HOST_MASK)) |
+		(vmcs12_read(vcpu, CR4_READ_SHADOW) &
+			vmcs12_read(vcpu, CR4_GUEST_HOST_MASK));
 }
 
 static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
@@ -1618,10 +1626,10 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
 		 * up-to-date here because we just decached cr0.TS (and we'll
 		 * only update vmcs12->guest_cr0 on nested exit).
 		 */
-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-		vmcs12->guest_cr0 = (vmcs12->guest_cr0 & ~X86_CR0_TS) |
+		u64 guest_cr0 = (vmcs12_read(vcpu, GUEST_CR0) & ~X86_CR0_TS) |
 			(vcpu->arch.cr0 & X86_CR0_TS);
-		vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
+		vmcs12_write(vcpu, GUEST_CR0, guest_cr0);
+		vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vcpu));
 	} else
 		vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
 }
@@ -1705,10 +1713,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
  */
 static int nested_pf_handled(struct kvm_vcpu *vcpu)
 {
-	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
 	/* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
-	if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
+	if (!(vmcs12_read(vcpu, EXCEPTION_BITMAP) & (1u << PF_VECTOR)))
 		return 0;
 
 	nested_vmx_vmexit(vcpu);
@@ -1878,13 +1884,11 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 		 * set for L2 remains unchanged, and still needs to be added
 		 * to the newly set TSC to get L2's TSC.
 		 */
-		struct vmcs12 *vmcs12;
 		to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset;
 		/* recalculate vmcs02.TSC_OFFSET: */
-		vmcs12 = get_vmcs12(vcpu);
 		vmcs_write64(TSC_OFFSET, offset +
 			(nested_cpu_has(vcpu, CPU_BASED_USE_TSC_OFFSETING) ?
-			 vmcs12->tsc_offset : 0));
+			 vmcs12_read(vcpu, TSC_OFFSET) : 0));
 	} else {
 		vmcs_write64(TSC_OFFSET, offset);
 	}
@@ -3753,7 +3757,7 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
 		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
 	if (is_guest_mode(&vmx->vcpu))
 		vmx->vcpu.arch.cr4_guest_owned_bits &=
-			~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
+			~vmcs12_read(&vmx->vcpu, CR4_GUEST_HOST_MASK);
 	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
 }
 
@@ -4025,7 +4029,7 @@ out:
  */
 static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
 {
-	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
+	return vmcs12_read(vcpu, PIN_BASED_VM_EXEC_CONTROL) &
 		PIN_BASED_EXT_INTR_MASK;
 }
 
@@ -4165,14 +4169,14 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
 	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 		if (to_vmx(vcpu)->nested.nested_run_pending ||
-		    (vmcs12->idt_vectoring_info_field &
+		    (vmcs12_read(vcpu, IDT_VECTORING_INFO_FIELD) &
 		     VECTORING_INFO_VALID_MASK))
 			return 0;
 		nested_vmx_vmexit(vcpu);
-		vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
-		vmcs12->vm_exit_intr_info = 0;
+		vmcs12_write(vcpu, VM_EXIT_REASON,
+			EXIT_REASON_EXTERNAL_INTERRUPT);
+		vmcs12_write(vcpu, VM_EXIT_INTR_INFO, 0);
 		/* fall through to normal code, but now in L1, not L2 */
 	}
 
@@ -5303,7 +5307,7 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
 			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
 			    X86_EFLAGS_SF | X86_EFLAGS_OF))
 			| X86_EFLAGS_ZF);
-	get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
+	vmcs12_write(vcpu, VM_INSTRUCTION_ERROR, vm_instruction_error);
 }
 
 /* Emulate the VMCLEAR instruction */
@@ -5714,7 +5718,7 @@ static const int kvm_vmx_max_exit_handlers =
  * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
  */
 static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
-	struct vmcs12 *vmcs12, u32 exit_reason)
+	u32 exit_reason)
 {
 	u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
 	gpa_t bitmap;
@@ -5727,7 +5731,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
 	 * for the four combinations of read/write and low/high MSR numbers.
 	 * First we need to figure out which of the four to use:
 	 */
-	bitmap = vmcs12->msr_bitmap;
+	bitmap = vmcs12_read(vcpu, MSR_BITMAP);
 	if (exit_reason == EXIT_REASON_MSR_WRITE)
 		bitmap += 2048;
 	if (msr_index >= 0xc0000000) {
@@ -5749,8 +5753,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
  * rather than handle it ourselves in L0. I.e., check if L1 wanted to
  * intercept (via guest_host_mask etc.) the current event.
  */
-static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
-	struct vmcs12 *vmcs12)
+static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 	int cr = exit_qualification & 15;
@@ -5761,26 +5764,26 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 	case 0: /* mov to cr */
 		switch (cr) {
 		case 0:
-			if (vmcs12->cr0_guest_host_mask &
-			    (val ^ vmcs12->cr0_read_shadow))
+			if (vmcs12_read(vcpu, CR0_GUEST_HOST_MASK) &
+			    (val ^ vmcs12_read(vcpu, CR0_READ_SHADOW)))
 				return 1;
 			break;
 		case 3:
-			if ((vmcs12->cr3_target_count >= 1 &&
-					vmcs12->cr3_target_value0 == val) ||
-				(vmcs12->cr3_target_count >= 2 &&
-					vmcs12->cr3_target_value1 == val) ||
-				(vmcs12->cr3_target_count >= 3 &&
-					vmcs12->cr3_target_value2 == val) ||
-				(vmcs12->cr3_target_count >= 4 &&
-					vmcs12->cr3_target_value3 == val))
+			if ((vmcs12_read(vcpu, CR3_TARGET_COUNT) >= 1 &&
+				vmcs12_read(vcpu, CR3_TARGET_VALUE0) == val) ||
+				(vmcs12_read(vcpu, CR3_TARGET_COUNT) >= 2 &&
+				vmcs12_read(vcpu, CR3_TARGET_VALUE1) == val) ||
+				(vmcs12_read(vcpu, CR3_TARGET_COUNT) >= 3 &&
+				vmcs12_read(vcpu, CR3_TARGET_VALUE2) == val) ||
+				(vmcs12_read(vcpu, CR3_TARGET_COUNT) >= 4 &&
+				vmcs12_read(vcpu, CR3_TARGET_VALUE3) == val))
 				return 0;
 			if (nested_cpu_has(vcpu, CPU_BASED_CR3_LOAD_EXITING))
 				return 1;
 			break;
 		case 4:
-			if (vmcs12->cr4_guest_host_mask &
-			    (vmcs12->cr4_read_shadow ^ val))
+			if (vmcs12_read(vcpu, CR4_GUEST_HOST_MASK) &
+			    (vmcs12_read(vcpu, CR4_READ_SHADOW) ^ val))
 				return 1;
 			break;
 		case 8:
@@ -5790,19 +5793,19 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 		}
 		break;
 	case 2: /* clts */
-		if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
-		    (vmcs12->cr0_read_shadow & X86_CR0_TS))
+		if ((vmcs12_read(vcpu, CR0_GUEST_HOST_MASK) & X86_CR0_TS) &&
+		    (vmcs12_read(vcpu, CR0_READ_SHADOW) & X86_CR0_TS))
 			return 1;
 		break;
 	case 1: /* mov from cr */
 		switch (cr) {
 		case 3:
-			if (vmcs12->cpu_based_vm_exec_control &
+			if (vmcs12_read(vcpu, CPU_BASED_VM_EXEC_CONTROL) &
 			    CPU_BASED_CR3_STORE_EXITING)
 				return 1;
 			break;
 		case 8:
-			if (vmcs12->cpu_based_vm_exec_control &
+			if (vmcs12_read(vcpu, CPU_BASED_VM_EXEC_CONTROL) &
 			    CPU_BASED_CR8_STORE_EXITING)
 				return 1;
 			break;
@@ -5813,11 +5816,11 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 		 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
 		 * cr0. Other attempted changes are ignored, with no exit.
 		 */
-		if (vmcs12->cr0_guest_host_mask & 0xe &
-		    (val ^ vmcs12->cr0_read_shadow))
+		if (vmcs12_read(vcpu, CR0_GUEST_HOST_MASK) & 0xe &
+		    (val ^ vmcs12_read(vcpu, CR0_READ_SHADOW)))
 			return 1;
-		if ((vmcs12->cr0_guest_host_mask & 0x1) &&
-		    !(vmcs12->cr0_read_shadow & 0x1) &&
+		if ((vmcs12_read(vcpu, CR0_GUEST_HOST_MASK) & 0x1) &&
+		    !(vmcs12_read(vcpu, CR0_READ_SHADOW) & 0x1) &&
 		    (val & 0x1))
 			return 1;
 		break;
@@ -5835,7 +5838,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
 	u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
 	if (vmx->nested.nested_run_pending)
 		return 0;
@@ -5852,7 +5854,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 			return 0;
 		else if (is_page_fault(intr_info))
 			return enable_ept;
-		return vmcs12->exception_bitmap &
+		return vmcs12_read(vcpu, EXCEPTION_BITMAP) &
 				(1u << (intr_info & INTR_INFO_VECTOR_MASK));
 	case EXIT_REASON_EXTERNAL_INTERRUPT:
 		return 0;
@@ -5892,7 +5894,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		 */
 		return 1;
 	case EXIT_REASON_CR_ACCESS:
-		return nested_vmx_exit_handled_cr(vcpu, vmcs12);
+		return nested_vmx_exit_handled_cr(vcpu);
 	case EXIT_REASON_DR_ACCESS:
 		return nested_cpu_has(vcpu, CPU_BASED_MOV_DR_EXITING);
 	case EXIT_REASON_IO_INSTRUCTION:
@@ -5900,7 +5902,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		return 1;
 	case EXIT_REASON_MSR_READ:
 	case EXIT_REASON_MSR_WRITE:
-		return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
+		return nested_vmx_exit_handled_msr(vcpu, exit_reason);
 	case EXIT_REASON_INVALID_STATE:
 		return 1;
 	case EXIT_REASON_MWAIT_INSTRUCTION:
@@ -6199,17 +6201,21 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	unsigned long debugctlmsr;
 
 	if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-		if (vmcs12->idt_vectoring_info_field &
-				VECTORING_INFO_VALID_MASK) {
+		u64 idt_vectoring_info_field = vmcs12_read(vcpu,
+						IDT_VECTORING_INFO_FIELD);
+		u64 vm_exit_instruction_len = vmcs12_read(vcpu,
+						VM_EXIT_INSTRUCTION_LEN);
+		u64 idt_vectoring_error_code = vmcs12_read(vcpu,
+						IDT_VECTORING_ERROR_CODE);
+		if (idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) {
 			vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-				vmcs12->idt_vectoring_info_field);
+				     idt_vectoring_info_field);
 			vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
-				vmcs12->vm_exit_instruction_len);
-			if (vmcs12->idt_vectoring_info_field &
+				     vm_exit_instruction_len);
+			if (idt_vectoring_info_field &
 					VECTORING_INFO_DELIVER_CODE_MASK)
 				vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
-					vmcs12->idt_vectoring_error_code);
+					idt_vectoring_error_code);
 		}
 	}
 
@@ -6372,13 +6378,13 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
 	if (is_guest_mode(vcpu)) {
-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-		vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
+		vmcs12_write(vcpu, IDT_VECTORING_INFO_FIELD,
+				vmx->idt_vectoring_info);
 		if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
-			vmcs12->idt_vectoring_error_code =
-				vmcs_read32(IDT_VECTORING_ERROR_CODE);
-			vmcs12->vm_exit_instruction_len =
-				vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+			vmcs12_write(vcpu, IDT_VECTORING_ERROR_CODE,
+				vmcs_read32(IDT_VECTORING_ERROR_CODE));
+			vmcs12_write(vcpu, VM_EXIT_INSTRUCTION_LEN,
+				vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
 		}
 	}
 
@@ -6589,71 +6595,75 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
  * function also has additional necessary side-effects, like setting various
  * vcpu->arch fields.
  */
-static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+static void prepare_vmcs02(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 exec_control;
 
-	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
-	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
-	vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
-	vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
-	vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
-	vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
-	vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
-	vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
-	vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
-	vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
-	vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
-	vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
-	vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
-	vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
-	vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
-	vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
-	vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
-	vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
-	vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
-	vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
-	vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
-	vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
-	vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
-	vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
-	vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
-	vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
-	vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
-	vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
-	vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
-	vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
-	vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
-	vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
-	vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
-	vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
-	vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
-	vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
-
-	vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
+	vmcs_write16(GUEST_ES_SELECTOR, vmcs12_read(vcpu, GUEST_ES_SELECTOR));
+	vmcs_write16(GUEST_CS_SELECTOR, vmcs12_read(vcpu, GUEST_CS_SELECTOR));
+	vmcs_write16(GUEST_SS_SELECTOR, vmcs12_read(vcpu, GUEST_SS_SELECTOR));
+	vmcs_write16(GUEST_DS_SELECTOR, vmcs12_read(vcpu, GUEST_DS_SELECTOR));
+	vmcs_write16(GUEST_FS_SELECTOR, vmcs12_read(vcpu, GUEST_FS_SELECTOR));
+	vmcs_write16(GUEST_GS_SELECTOR, vmcs12_read(vcpu, GUEST_GS_SELECTOR));
+	vmcs_write16(GUEST_LDTR_SELECTOR,
+		vmcs12_read(vcpu, GUEST_LDTR_SELECTOR));
+	vmcs_write16(GUEST_TR_SELECTOR, vmcs12_read(vcpu, GUEST_TR_SELECTOR));
+	vmcs_write32(GUEST_ES_LIMIT, vmcs12_read(vcpu, GUEST_ES_LIMIT));
+	vmcs_write32(GUEST_CS_LIMIT, vmcs12_read(vcpu, GUEST_CS_LIMIT));
+	vmcs_write32(GUEST_SS_LIMIT, vmcs12_read(vcpu, GUEST_SS_LIMIT));
+	vmcs_write32(GUEST_DS_LIMIT, vmcs12_read(vcpu, GUEST_DS_LIMIT));
+	vmcs_write32(GUEST_FS_LIMIT, vmcs12_read(vcpu, GUEST_FS_LIMIT));
+	vmcs_write32(GUEST_GS_LIMIT, vmcs12_read(vcpu, GUEST_GS_LIMIT));
+	vmcs_write32(GUEST_LDTR_LIMIT, vmcs12_read(vcpu, GUEST_LDTR_LIMIT));
+	vmcs_write32(GUEST_TR_LIMIT, vmcs12_read(vcpu, GUEST_TR_LIMIT));
+	vmcs_write32(GUEST_GDTR_LIMIT, vmcs12_read(vcpu, GUEST_GDTR_LIMIT));
+	vmcs_write32(GUEST_IDTR_LIMIT, vmcs12_read(vcpu, GUEST_IDTR_LIMIT));
+	vmcs_write32(GUEST_ES_AR_BYTES, vmcs12_read(vcpu, GUEST_ES_AR_BYTES));
+	vmcs_write32(GUEST_CS_AR_BYTES, vmcs12_read(vcpu, GUEST_CS_AR_BYTES));
+	vmcs_write32(GUEST_SS_AR_BYTES, vmcs12_read(vcpu, GUEST_SS_AR_BYTES));
+	vmcs_write32(GUEST_DS_AR_BYTES, vmcs12_read(vcpu, GUEST_DS_AR_BYTES));
+	vmcs_write32(GUEST_FS_AR_BYTES, vmcs12_read(vcpu, GUEST_FS_AR_BYTES));
+	vmcs_write32(GUEST_GS_AR_BYTES, vmcs12_read(vcpu, GUEST_GS_AR_BYTES));
+	vmcs_write32(GUEST_LDTR_AR_BYTES,
+		vmcs12_read(vcpu, GUEST_LDTR_AR_BYTES));
+	vmcs_write32(GUEST_TR_AR_BYTES, vmcs12_read(vcpu, GUEST_TR_AR_BYTES));
+	vmcs_writel(GUEST_ES_BASE, vmcs12_read(vcpu, GUEST_ES_BASE));
+	vmcs_writel(GUEST_CS_BASE, vmcs12_read(vcpu, GUEST_CS_BASE));
+	vmcs_writel(GUEST_SS_BASE, vmcs12_read(vcpu, GUEST_SS_BASE));
+	vmcs_writel(GUEST_DS_BASE, vmcs12_read(vcpu, GUEST_DS_BASE));
+	vmcs_writel(GUEST_FS_BASE, vmcs12_read(vcpu, GUEST_FS_BASE));
+	vmcs_writel(GUEST_GS_BASE, vmcs12_read(vcpu, GUEST_GS_BASE));
+	vmcs_writel(GUEST_LDTR_BASE, vmcs12_read(vcpu, GUEST_LDTR_BASE));
+	vmcs_writel(GUEST_TR_BASE, vmcs12_read(vcpu, GUEST_TR_BASE));
+	vmcs_writel(GUEST_GDTR_BASE, vmcs12_read(vcpu, GUEST_GDTR_BASE));
+	vmcs_writel(GUEST_IDTR_BASE, vmcs12_read(vcpu, GUEST_IDTR_BASE));
+
+	vmcs_write64(GUEST_IA32_DEBUGCTL,
+		vmcs12_read(vcpu, GUEST_IA32_DEBUGCTL));
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-		vmcs12->vm_entry_intr_info_field);
+		vmcs12_read(vcpu, VM_ENTRY_INTR_INFO_FIELD));
 	vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
-		vmcs12->vm_entry_exception_error_code);
+		vmcs12_read(vcpu, VM_ENTRY_EXCEPTION_ERROR_CODE));
 	vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
-		vmcs12->vm_entry_instruction_len);
+		vmcs12_read(vcpu, VM_ENTRY_INSTRUCTION_LEN));
 	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
-		vmcs12->guest_interruptibility_info);
-	vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state);
-	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
-	vmcs_writel(GUEST_DR7, vmcs12->guest_dr7);
-	vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags);
+		vmcs12_read(vcpu, GUEST_INTERRUPTIBILITY_INFO));
+	vmcs_write32(GUEST_ACTIVITY_STATE,
+		vmcs12_read(vcpu, GUEST_ACTIVITY_STATE));
+	vmcs_write32(GUEST_SYSENTER_CS, vmcs12_read(vcpu, GUEST_SYSENTER_CS));
+	vmcs_writel(GUEST_DR7, vmcs12_read(vcpu, GUEST_DR7));
+	vmcs_writel(GUEST_RFLAGS, vmcs12_read(vcpu, GUEST_RFLAGS));
 	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
-		vmcs12->guest_pending_dbg_exceptions);
-	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
-	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+		vmcs12_read(vcpu, GUEST_PENDING_DBG_EXCEPTIONS));
+	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12_read(vcpu, GUEST_SYSENTER_ESP));
+	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12_read(vcpu, GUEST_SYSENTER_EIP));
 
 	vmcs_write64(VMCS_LINK_POINTER, -1ull);
 
 	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
 		(vmcs_config.pin_based_exec_ctrl |
-		 vmcs12->pin_based_vm_exec_control));
+		 vmcs12_read(vcpu, PIN_BASED_VM_EXEC_CONTROL)));
 
 	/*
 	 * Whether page-faults are trapped is determined by a combination of
@@ -6676,9 +6686,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	 * page tables), using walk_addr(), when injecting PFs to L1.
 	 */
 	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
-		enable_ept ? vmcs12->page_fault_error_code_mask : 0);
+		enable_ept ? vmcs12_read(vcpu, PAGE_FAULT_ERROR_CODE_MASK) : 0);
 	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
-		enable_ept ? vmcs12->page_fault_error_code_match : 0);
+		enable_ept ?
+		vmcs12_read(vcpu, PAGE_FAULT_ERROR_CODE_MATCH) :
+		0);
 
 	if (cpu_has_secondary_exec_ctrls()) {
 		u32 exec_control = vmx_secondary_exec_control(vmx);
@@ -6687,7 +6699,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		/* Take the following fields only from vmcs12 */
 		exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
 		if (nested_cpu_has(vcpu, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
-			exec_control |= vmcs12->secondary_vm_exec_control;
+			exec_control |=
+				vmcs12_read(vcpu, SECONDARY_VM_EXEC_CONTROL);
 
 		if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) {
 			/*
@@ -6699,7 +6712,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 			if (vmx->nested.apic_access_page) /* shouldn't happen */
 				nested_release_page(vmx->nested.apic_access_page);
 			vmx->nested.apic_access_page =
-				nested_get_page(vcpu, vmcs12->apic_access_addr);
+				nested_get_page(vcpu,
+					vmcs12_read(vcpu, APIC_ACCESS_ADDR));
 			/*
 			 * If translation failed, no matter: This feature asks
 			 * to exit when accessing the given address, and if it
@@ -6739,7 +6753,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
 	exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
 	exec_control &= ~CPU_BASED_TPR_SHADOW;
-	exec_control |= vmcs12->cpu_based_vm_exec_control;
+	exec_control |= vmcs12_read(vcpu, CPU_BASED_VM_EXEC_CONTROL);
 	/*
 	 * Merging of IO and MSR bitmaps not currently supported.
 	 * Rather, exit every time.
@@ -6755,26 +6769,29 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	 * trap. Note that CR0.TS also needs updating - we do this later.
 	 */
 	update_exception_bitmap(vcpu);
-	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
+	vcpu->arch.cr0_guest_owned_bits &=
+		~vmcs12_read(vcpu, CR0_GUEST_HOST_MASK);
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
 
 	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */
 	vmcs_write32(VM_EXIT_CONTROLS,
-		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
-	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
+		vmcs12_read(vcpu, VM_EXIT_CONTROLS) | vmcs_config.vmexit_ctrl);
+	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12_read(vcpu, VM_ENTRY_CONTROLS) |
 		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
 
-	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)
-		vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
+	if (vmcs12_read(vcpu, VM_ENTRY_CONTROLS) & VM_ENTRY_LOAD_IA32_PAT)
+		vmcs_write64(GUEST_IA32_PAT, vmcs12_read(vcpu, GUEST_IA32_PAT));
 	else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
 
 
 	set_cr4_guest_host_mask(vmx);
 
-	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
+	if (vmcs12_read(vcpu, CPU_BASED_VM_EXEC_CONTROL) &
+		CPU_BASED_USE_TSC_OFFSETING)
 		vmcs_write64(TSC_OFFSET,
-			vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
+			vmx->nested.vmcs01_tsc_offset +
+				vmcs12_read(vcpu, TSC_OFFSET));
 	else
 		vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
 
@@ -6788,9 +6805,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		vmx_flush_tlb(vcpu);
 	}
 
-	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
-		vcpu->arch.efer = vmcs12->guest_ia32_efer;
-	if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
+	if (vmcs12_read(vcpu, VM_ENTRY_CONTROLS) & VM_ENTRY_LOAD_IA32_EFER)
+		vcpu->arch.efer = vmcs12_read(vcpu, GUEST_IA32_EFER);
+	if (vmcs12_read(vcpu, VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE)
 		vcpu->arch.efer |= (EFER_LMA | EFER_LME);
 	else
 		vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
@@ -6805,18 +6822,18 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	 * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we
 	 * have more bits than L1 expected.
 	 */
-	vmx_set_cr0(vcpu, vmcs12->guest_cr0);
-	vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
+	vmx_set_cr0(vcpu, vmcs12_read(vcpu, GUEST_CR0));
+	vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vcpu));
 
-	vmx_set_cr4(vcpu, vmcs12->guest_cr4);
-	vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
+	vmx_set_cr4(vcpu, vmcs12_read(vcpu, GUEST_CR4));
+	vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vcpu));
 
 	/* shadow page tables on either EPT or shadow page tables */
-	kvm_set_cr3(vcpu, vmcs12->guest_cr3);
+	kvm_set_cr3(vcpu, vmcs12_read(vcpu, GUEST_CR3));
 	kvm_mmu_reset_context(vcpu);
 
-	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
-	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
+	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12_read(vcpu, GUEST_RSP));
+	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12_read(vcpu, GUEST_RIP));
 }
 
 /*
@@ -6854,59 +6871,64 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 		return 1;
 	}
 
-	if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
-			!IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) {
+	if ((vmcs12_read(vcpu, CPU_BASED_VM_EXEC_CONTROL) &
+		CPU_BASED_USE_MSR_BITMAPS) &&
+			!IS_ALIGNED(vmcs12_read(vcpu, MSR_BITMAP), PAGE_SIZE)) {
 		/*TODO: Also verify bits beyond physical address width are 0*/
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
 	}
 
 	if (nested_cpu_has2(vcpu, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
-			!IS_ALIGNED(vmcs12->apic_access_addr, PAGE_SIZE)) {
+		!IS_ALIGNED(vmcs12_read(vcpu, APIC_ACCESS_ADDR), PAGE_SIZE)) {
 		/*TODO: Also verify bits beyond physical address width are 0*/
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
 	}
 
-	if (vmcs12->vm_entry_msr_load_count > 0 ||
-	    vmcs12->vm_exit_msr_load_count > 0 ||
-	    vmcs12->vm_exit_msr_store_count > 0) {
+	if (vmcs12_read(vcpu, VM_ENTRY_MSR_LOAD_COUNT) > 0 ||
+	    vmcs12_read(vcpu, VM_EXIT_MSR_LOAD_COUNT) > 0 ||
+	    vmcs12_read(vcpu, VM_EXIT_MSR_STORE_COUNT) > 0) {
 		pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n",
 				    __func__);
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
 	}
 
-	if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
+	if (!vmx_control_verify(vmcs12_read(vcpu, CPU_BASED_VM_EXEC_CONTROL),
 	      nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high) ||
-	    !vmx_control_verify(vmcs12->secondary_vm_exec_control,
+	    !vmx_control_verify(vmcs12_read(vcpu, SECONDARY_VM_EXEC_CONTROL),
 	      nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) ||
-	    !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
+	    !vmx_control_verify(vmcs12_read(vcpu, PIN_BASED_VM_EXEC_CONTROL),
 	      nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) ||
-	    !vmx_control_verify(vmcs12->vm_exit_controls,
+	    !vmx_control_verify(vmcs12_read(vcpu, VM_EXIT_CONTROLS),
 	      nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) ||
-	    !vmx_control_verify(vmcs12->vm_entry_controls,
+	    !vmx_control_verify(vmcs12_read(vcpu, VM_ENTRY_CONTROLS),
 	      nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high))
 	{
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
 	}
 
-	if (((vmcs12->host_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
-	    ((vmcs12->host_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
+	if (((vmcs12_read(vcpu, HOST_CR0) & VMXON_CR0_ALWAYSON) !=
+		VMXON_CR0_ALWAYSON) ||
+	    ((vmcs12_read(vcpu, HOST_CR4) & VMXON_CR4_ALWAYSON) !=
+		VMXON_CR4_ALWAYSON)) {
 		nested_vmx_failValid(vcpu,
 			VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
 		return 1;
 	}
 
-	if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
-	    ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
-		nested_vmx_entry_failure(vcpu, vmcs12,
+	if (((vmcs12_read(vcpu, GUEST_CR0) & VMXON_CR0_ALWAYSON) !=
+		VMXON_CR0_ALWAYSON) ||
+	    ((vmcs12_read(vcpu, GUEST_CR4) & VMXON_CR4_ALWAYSON) !=
+		VMXON_CR4_ALWAYSON)) {
+		nested_vmx_entry_failure(vcpu,
 			EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
 		return 1;
 	}
-	if (vmcs12->vmcs_link_pointer != -1ull) {
-		nested_vmx_entry_failure(vcpu, vmcs12,
+	if (vmcs12_read(vcpu, VMCS_LINK_POINTER) != -1ull) {
+		nested_vmx_entry_failure(vcpu,
 			EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR);
 		return 1;
 	}
@@ -6933,7 +6955,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
 	vmcs12->launch_state = 1;
 
-	prepare_vmcs02(vcpu, vmcs12);
+	prepare_vmcs02(vcpu);
 
 	/*
 	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
@@ -6962,22 +6984,26 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
  *     put them in vmcs02 CR0_READ_SHADOW. So take these bits from there.
  */
 static inline unsigned long
-vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+vmcs12_guest_cr0(struct kvm_vcpu *vcpu)
 {
 	return
 	/*1*/	(vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
-	/*2*/	(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
-	/*3*/	(vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
+	/*2*/	(vmcs12_read(vcpu, GUEST_CR0) &
+			vmcs12_read(vcpu, CR0_GUEST_HOST_MASK)) |
+	/*3*/	(vmcs_readl(CR0_READ_SHADOW) &
+			~(vmcs12_read(vcpu, CR0_GUEST_HOST_MASK) |
 			vcpu->arch.cr0_guest_owned_bits));
 }
 
 static inline unsigned long
-vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+vmcs12_guest_cr4(struct kvm_vcpu *vcpu)
 {
 	return
 	/*1*/	(vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
-	/*2*/	(vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
-	/*3*/	(vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
+	/*2*/	(vmcs12_read(vcpu, GUEST_CR4) &
+			vmcs12_read(vcpu, CR4_GUEST_HOST_MASK)) |
+	/*3*/	(vmcs_readl(CR4_READ_SHADOW) &
+			~(vmcs12_read(vcpu, CR4_GUEST_HOST_MASK) |
 			vcpu->arch.cr4_guest_owned_bits));
 }
 
@@ -6992,86 +7018,100 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
  * exit-information fields only. Other fields are modified by L1 with VMWRITE,
  * which already writes to vmcs12 directly.
  */
-void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+void prepare_vmcs12(struct kvm_vcpu *vcpu)
 {
+	u64 guest_dr7;
 	/* update guest state fields: */
-	vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
-	vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
-
-	kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
-	vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
-	vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
-	vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
-
-	vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
-	vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
-	vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
-	vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
-	vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
-	vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
-	vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
-	vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
-	vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
-	vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
-	vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
-	vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
-	vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
-	vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
-	vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
-	vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
-	vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
-	vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
-	vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
-	vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
-	vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
-	vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
-	vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
-	vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
-	vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
-	vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
-	vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
-	vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
-	vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
-	vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
-	vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
-	vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
-	vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
-	vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
-	vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
-	vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
-
-	vmcs12->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE);
-	vmcs12->guest_interruptibility_info =
-		vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-	vmcs12->guest_pending_dbg_exceptions =
-		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
+	vmcs12_write(vcpu, GUEST_CR0, vmcs12_guest_cr0(vcpu));
+	vmcs12_write(vcpu, GUEST_CR4, vmcs12_guest_cr4(vcpu));
+
+	kvm_get_dr(vcpu, 7, (unsigned long *)&guest_dr7);
+	vmcs12_write(vcpu, GUEST_DR7, guest_dr7);
+	vmcs12_write(vcpu, GUEST_RSP, kvm_register_read(vcpu, VCPU_REGS_RSP));
+	vmcs12_write(vcpu, GUEST_RIP, kvm_register_read(vcpu, VCPU_REGS_RIP));
+	vmcs12_write(vcpu, GUEST_RFLAGS, vmcs_readl(GUEST_RFLAGS));
+
+	vmcs12_write(vcpu, GUEST_ES_SELECTOR, vmcs_read16(GUEST_ES_SELECTOR));
+	vmcs12_write(vcpu, GUEST_CS_SELECTOR, vmcs_read16(GUEST_CS_SELECTOR));
+	vmcs12_write(vcpu, GUEST_SS_SELECTOR, vmcs_read16(GUEST_SS_SELECTOR));
+	vmcs12_write(vcpu, GUEST_DS_SELECTOR, vmcs_read16(GUEST_DS_SELECTOR));
+	vmcs12_write(vcpu, GUEST_FS_SELECTOR, vmcs_read16(GUEST_FS_SELECTOR));
+	vmcs12_write(vcpu, GUEST_GS_SELECTOR, vmcs_read16(GUEST_GS_SELECTOR));
+	vmcs12_write(vcpu, GUEST_LDTR_SELECTOR,
+		vmcs_read16(GUEST_LDTR_SELECTOR));
+	vmcs12_write(vcpu, GUEST_TR_SELECTOR, vmcs_read16(GUEST_TR_SELECTOR));
+	vmcs12_write(vcpu, GUEST_ES_LIMIT, vmcs_read32(GUEST_ES_LIMIT));
+	vmcs12_write(vcpu, GUEST_CS_LIMIT, vmcs_read32(GUEST_CS_LIMIT));
+	vmcs12_write(vcpu, GUEST_SS_LIMIT, vmcs_read32(GUEST_SS_LIMIT));
+	vmcs12_write(vcpu, GUEST_DS_LIMIT, vmcs_read32(GUEST_DS_LIMIT));
+	vmcs12_write(vcpu, GUEST_FS_LIMIT, vmcs_read32(GUEST_FS_LIMIT));
+	vmcs12_write(vcpu, GUEST_GS_LIMIT, vmcs_read32(GUEST_GS_LIMIT));
+	vmcs12_write(vcpu, GUEST_LDTR_LIMIT, vmcs_read32(GUEST_LDTR_LIMIT));
+	vmcs12_write(vcpu, GUEST_TR_LIMIT, vmcs_read32(GUEST_TR_LIMIT));
+	vmcs12_write(vcpu, GUEST_GDTR_LIMIT, vmcs_read32(GUEST_GDTR_LIMIT));
+	vmcs12_write(vcpu, GUEST_IDTR_LIMIT, vmcs_read32(GUEST_IDTR_LIMIT));
+	vmcs12_write(vcpu, GUEST_ES_AR_BYTES, vmcs_read32(GUEST_ES_AR_BYTES));
+	vmcs12_write(vcpu, GUEST_CS_AR_BYTES, vmcs_read32(GUEST_CS_AR_BYTES));
+	vmcs12_write(vcpu, GUEST_SS_AR_BYTES, vmcs_read32(GUEST_SS_AR_BYTES));
+	vmcs12_write(vcpu, GUEST_DS_AR_BYTES, vmcs_read32(GUEST_DS_AR_BYTES));
+	vmcs12_write(vcpu, GUEST_FS_AR_BYTES, vmcs_read32(GUEST_FS_AR_BYTES));
+	vmcs12_write(vcpu, GUEST_GS_AR_BYTES, vmcs_read32(GUEST_GS_AR_BYTES));
+	vmcs12_write(vcpu, GUEST_LDTR_AR_BYTES,
+		vmcs_read32(GUEST_LDTR_AR_BYTES));
+	vmcs12_write(vcpu, GUEST_TR_AR_BYTES, vmcs_read32(GUEST_TR_AR_BYTES));
+	vmcs12_write(vcpu, GUEST_ES_BASE, vmcs_readl(GUEST_ES_BASE));
+	vmcs12_write(vcpu, GUEST_CS_BASE, vmcs_readl(GUEST_CS_BASE));
+	vmcs12_write(vcpu, GUEST_SS_BASE, vmcs_readl(GUEST_SS_BASE));
+	vmcs12_write(vcpu, GUEST_DS_BASE, vmcs_readl(GUEST_DS_BASE));
+	vmcs12_write(vcpu, GUEST_FS_BASE, vmcs_readl(GUEST_FS_BASE));
+	vmcs12_write(vcpu, GUEST_GS_BASE, vmcs_readl(GUEST_GS_BASE));
+	vmcs12_write(vcpu, GUEST_LDTR_BASE, vmcs_readl(GUEST_LDTR_BASE));
+	vmcs12_write(vcpu, GUEST_TR_BASE, vmcs_readl(GUEST_TR_BASE));
+	vmcs12_write(vcpu, GUEST_GDTR_BASE, vmcs_readl(GUEST_GDTR_BASE));
+	vmcs12_write(vcpu, GUEST_IDTR_BASE, vmcs_readl(GUEST_IDTR_BASE));
+
+	vmcs12_write(vcpu, GUEST_ACTIVITY_STATE,
+		vmcs_read32(GUEST_ACTIVITY_STATE));
+	vmcs12_write(vcpu, GUEST_INTERRUPTIBILITY_INFO,
+		vmcs_read32(GUEST_INTERRUPTIBILITY_INFO));
+	vmcs12_write(vcpu, GUEST_PENDING_DBG_EXCEPTIONS,
+		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
 
 	/* TODO: These cannot have changed unless we have MSR bitmaps and
 	 * the relevant bit asks not to trap the change */
-	vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
-	if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT)
-		vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
-	vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
-	vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
-	vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
+	vmcs12_write(vcpu, GUEST_IA32_DEBUGCTL,
+		vmcs_read64(GUEST_IA32_DEBUGCTL));
+	if (vmcs12_read(vcpu, VM_ENTRY_CONTROLS) & VM_EXIT_SAVE_IA32_PAT)
+		vmcs12_write(vcpu, GUEST_IA32_PAT,
+			vmcs_read64(GUEST_IA32_PAT));
+	vmcs12_write(vcpu, GUEST_SYSENTER_CS, vmcs_read32(GUEST_SYSENTER_CS));
+	vmcs12_write(vcpu, GUEST_SYSENTER_ESP, vmcs_readl(GUEST_SYSENTER_ESP));
+	vmcs12_write(vcpu, GUEST_SYSENTER_EIP, vmcs_readl(GUEST_SYSENTER_EIP));
 
 	/* update exit information fields: */
 
-	vmcs12->vm_exit_reason  = vmcs_read32(VM_EXIT_REASON);
-	vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-
-	vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-	vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
-	vmcs12->idt_vectoring_info_field =
-		vmcs_read32(IDT_VECTORING_INFO_FIELD);
-	vmcs12->idt_vectoring_error_code =
-		vmcs_read32(IDT_VECTORING_ERROR_CODE);
-	vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
-	vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+	vmcs12_write(vcpu, VM_EXIT_REASON, vmcs_read32(VM_EXIT_REASON));
+	vmcs12_write(vcpu, EXIT_QUALIFICATION, vmcs_readl(EXIT_QUALIFICATION));
+
+	vmcs12_write(vcpu, VM_EXIT_INTR_INFO, vmcs_read32(VM_EXIT_INTR_INFO));
+	vmcs12_write(vcpu, VM_EXIT_INTR_ERROR_CODE,
+		vmcs_read32(VM_EXIT_INTR_ERROR_CODE));
+	vmcs12_write(vcpu, IDT_VECTORING_INFO_FIELD,
+		vmcs_read32(IDT_VECTORING_INFO_FIELD));
+	vmcs12_write(vcpu, IDT_VECTORING_ERROR_CODE,
+		vmcs_read32(IDT_VECTORING_ERROR_CODE));
+	vmcs12_write(vcpu, VM_EXIT_INSTRUCTION_LEN,
+		vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
+	vmcs12_write(vcpu, VMX_INSTRUCTION_INFO,
+		vmcs_read32(VMX_INSTRUCTION_INFO));
 
 	/* clear vm-entry fields which are to be cleared on exit */
-	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
-		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
+	if (!(vmcs12_read(vcpu, VM_EXIT_REASON) &
+		VMX_EXIT_REASONS_FAILED_VMENTRY)) {
+		u64 intr_info = vmcs12_read(vcpu, VM_ENTRY_INTR_INFO_FIELD);
+		vmcs12_write(vcpu, VM_ENTRY_INTR_INFO_FIELD,
+			intr_info & ~INTR_INFO_VALID_MASK);
+	}
 }
 
 /*
@@ -7083,25 +7123,25 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
  * Failures During or After Loading Guest State").
  * This function should be called when the active VMCS is L1's (vmcs01).
  */
-void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+void load_vmcs12_host_state(struct kvm_vcpu *vcpu)
 {
-	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
-		vcpu->arch.efer = vmcs12->host_ia32_efer;
-	if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
+	if (vmcs12_read(vcpu, VM_EXIT_CONTROLS) & VM_EXIT_LOAD_IA32_EFER)
+		vcpu->arch.efer = vmcs12_read(vcpu, HOST_IA32_EFER);
+	if (vmcs12_read(vcpu, VM_EXIT_CONTROLS) & VM_EXIT_HOST_ADDR_SPACE_SIZE)
 		vcpu->arch.efer |= (EFER_LMA | EFER_LME);
 	else
 		vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
 	vmx_set_efer(vcpu, vcpu->arch.efer);
 
-	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
-	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
+	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12_read(vcpu, HOST_RSP));
+	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12_read(vcpu, HOST_RIP));
 	/*
 	 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
 	 * actually changed, because it depends on the current state of
 	 * fpu_active (which may have changed).
 	 * Note that vmx_set_cr0 refers to efer set above.
 	 */
-	kvm_set_cr0(vcpu, vmcs12->host_cr0);
+	kvm_set_cr0(vcpu, vmcs12_read(vcpu, HOST_CR0));
 	/*
 	 * If we did fpu_activate()/fpu_deactivate() during L2's run, we need
 	 * to apply the same changes to L1's vmcs. We just set cr0 correctly,
@@ -7116,10 +7156,10 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	 * (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask();
 	 */
 	vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
-	kvm_set_cr4(vcpu, vmcs12->host_cr4);
+	kvm_set_cr4(vcpu, vmcs12_read(vcpu, HOST_CR4));
 
 	/* shadow page tables on either EPT or shadow page tables */
-	kvm_set_cr3(vcpu, vmcs12->host_cr3);
+	kvm_set_cr3(vcpu, vmcs12_read(vcpu, HOST_CR3));
 	kvm_mmu_reset_context(vcpu);
 
 	if (enable_vpid) {
@@ -7132,27 +7172,31 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	}
 
 
-	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
-	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
-	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
-	vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
-	vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
-	vmcs_writel(GUEST_TR_BASE, vmcs12->host_tr_base);
-	vmcs_writel(GUEST_GS_BASE, vmcs12->host_gs_base);
-	vmcs_writel(GUEST_FS_BASE, vmcs12->host_fs_base);
-	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->host_es_selector);
-	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->host_cs_selector);
-	vmcs_write16(GUEST_SS_SELECTOR, vmcs12->host_ss_selector);
-	vmcs_write16(GUEST_DS_SELECTOR, vmcs12->host_ds_selector);
-	vmcs_write16(GUEST_FS_SELECTOR, vmcs12->host_fs_selector);
-	vmcs_write16(GUEST_GS_SELECTOR, vmcs12->host_gs_selector);
-	vmcs_write16(GUEST_TR_SELECTOR, vmcs12->host_tr_selector);
-
-	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT)
-		vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
-	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+	vmcs_write32(GUEST_SYSENTER_CS,
+		vmcs12_read(vcpu, HOST_IA32_SYSENTER_CS));
+	vmcs_writel(GUEST_SYSENTER_ESP,
+		vmcs12_read(vcpu, HOST_IA32_SYSENTER_ESP));
+	vmcs_writel(GUEST_SYSENTER_EIP,
+		vmcs12_read(vcpu, HOST_IA32_SYSENTER_EIP));
+	vmcs_writel(GUEST_IDTR_BASE, vmcs12_read(vcpu, HOST_IDTR_BASE));
+	vmcs_writel(GUEST_GDTR_BASE, vmcs12_read(vcpu, HOST_GDTR_BASE));
+	vmcs_writel(GUEST_TR_BASE, vmcs12_read(vcpu, HOST_TR_BASE));
+	vmcs_writel(GUEST_GS_BASE, vmcs12_read(vcpu, HOST_GS_BASE));
+	vmcs_writel(GUEST_FS_BASE, vmcs12_read(vcpu, HOST_FS_BASE));
+	vmcs_write16(GUEST_ES_SELECTOR, vmcs12_read(vcpu, HOST_ES_SELECTOR));
+	vmcs_write16(GUEST_CS_SELECTOR, vmcs12_read(vcpu, HOST_CS_SELECTOR));
+	vmcs_write16(GUEST_SS_SELECTOR, vmcs12_read(vcpu, HOST_SS_SELECTOR));
+	vmcs_write16(GUEST_DS_SELECTOR, vmcs12_read(vcpu, HOST_DS_SELECTOR));
+	vmcs_write16(GUEST_FS_SELECTOR, vmcs12_read(vcpu, HOST_FS_SELECTOR));
+	vmcs_write16(GUEST_GS_SELECTOR, vmcs12_read(vcpu, HOST_GS_SELECTOR));
+	vmcs_write16(GUEST_TR_SELECTOR, vmcs12_read(vcpu, HOST_TR_SELECTOR));
+
+	if (vmcs12_read(vcpu, VM_EXIT_CONTROLS) & VM_EXIT_LOAD_IA32_PAT)
+		vmcs_write64(GUEST_IA32_PAT, vmcs12_read(vcpu, HOST_IA32_PAT));
+	if (vmcs12_read(vcpu, VM_EXIT_CONTROLS) &
+		VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
 		vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
-			vmcs12->host_ia32_perf_global_ctrl);
+			vmcs12_read(vcpu, HOST_IA32_PERF_GLOBAL_CTRL));
 }
 
 /*
@@ -7164,10 +7208,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int cpu;
-	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
 	leave_guest_mode(vcpu);
-	prepare_vmcs12(vcpu, vmcs12);
+	prepare_vmcs12(vcpu);
 
 	cpu = get_cpu();
 	vmx->loaded_vmcs = &vmx->vmcs01;
@@ -7180,7 +7223,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
 	if (VMCS02_POOL_SIZE == 0)
 		nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
 
-	load_vmcs12_host_state(vcpu, vmcs12);
+	load_vmcs12_host_state(vcpu);
 
 	/* Update TSC_OFFSET if TSC was changed while L2 ran */
 	vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
@@ -7214,12 +7257,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
  * vmcs01 is current (it doesn't leave_guest_mode() or switch vmcss).
  */
 static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
-			struct vmcs12 *vmcs12,
 			u32 reason, unsigned long qualification)
 {
-	load_vmcs12_host_state(vcpu, vmcs12);
-	vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
-	vmcs12->exit_qualification = qualification;
+	load_vmcs12_host_state(vcpu);
+	vmcs12_write(vcpu, VM_EXIT_REASON,
+		reason | VMX_EXIT_REASONS_FAILED_VMENTRY);
+	vmcs12_write(vcpu, EXIT_QUALIFICATION, qualification);
 	nested_vmx_succeed(vcpu);
 }
 
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 4/4] nested vmx: use a list to store the launched vmcs12 for L1 VMM
  2012-11-21  9:04 [PATCH 0/4] nested vmx code clean up and restructure Dongxiao Xu
                   ` (2 preceding siblings ...)
  2012-11-21  9:04 ` [PATCH 3/4] nested vmx: use vmcs12_read/write() to operate VMCS fields Dongxiao Xu
@ 2012-11-21  9:04 ` Dongxiao Xu
  2012-11-21 14:15   ` Gleb Natapov
  3 siblings, 1 reply; 13+ messages in thread
From: Dongxiao Xu @ 2012-11-21  9:04 UTC (permalink / raw)
  To: kvm; +Cc: mtosatti, gleb

The launch state is not a member in the VMCS area, use a separate
variable (list) to store it instead.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
---
 arch/x86/kvm/vmx.c |   86 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6687fb6..d03ab4e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -177,8 +177,7 @@ struct __packed vmcs12 {
 	u32 revision_id;
 	u32 abort;
 
-	u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
-	u32 padding[7]; /* room for future expansion */
+	u32 padding[8]; /* room for future expansion */
 
 	u64 io_bitmap_a;
 	u64 io_bitmap_b;
@@ -339,6 +338,11 @@ struct vmcs02_list {
 	struct loaded_vmcs vmcs02;
 };
 
+struct vmcs12_list {
+	unsigned long vmcs12_pa;
+	struct list_head node;
+};
+
 /*
  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
  * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -364,6 +368,8 @@ struct nested_vmx {
 	 * we must keep them pinned while L2 runs.
 	 */
 	struct page *apic_access_page;
+	/* vmcs12_pool contains the launched vmcs12. */
+	struct list_head vmcs12_pool;
 };
 
 struct vcpu_vmx {
@@ -614,6 +620,58 @@ static void nested_release_page_clean(struct page *page)
 	kvm_release_page_clean(page);
 }
 
+static int vmcs12_launched(struct list_head *vmcs12_pool,
+			       unsigned long vmcs12_pa)
+{
+	struct vmcs12_list *iter;
+	struct list_head *pos;
+	int launched = 0;
+
+	list_for_each(pos, vmcs12_pool) {
+		iter = list_entry(pos, struct vmcs12_list, node);
+		if (vmcs12_pa == iter->vmcs12_pa) {
+			launched = 1;
+			break;
+		}
+	}
+
+	return launched;
+}
+
+static int set_vmcs12_launched(struct list_head *vmcs12_pool,
+			   unsigned long vmcs12_pa)
+{
+	struct vmcs12_list *vmcs12;
+
+	if (vmcs12_launched(vmcs12_pool, vmcs12_pa))
+		return 0;
+
+	vmcs12 = kzalloc(sizeof(struct vmcs12_list), GFP_KERNEL);
+	if (!vmcs12)
+		return -ENOMEM;
+
+	vmcs12->vmcs12_pa = vmcs12_pa;
+	list_add(&vmcs12->node, vmcs12_pool);
+
+	return 0;
+}
+
+static void clear_vmcs12_launched(struct list_head *vmcs12_pool,
+			       unsigned long vmcs12_pa)
+{
+	struct vmcs12_list *iter;
+	struct list_head *pos;
+
+	list_for_each(pos, vmcs12_pool) {
+		iter = list_entry(pos, struct vmcs12_list, node);
+		if (vmcs12_pa == iter->vmcs12_pa) {
+			list_del(&iter->node);
+			kfree(iter);
+			break;
+		}
+	}
+}
+
 static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
@@ -5111,6 +5169,18 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
 }
 
 /*
+ * Free the vmcs12 list.
+ */
+static void nested_free_vmcs12_list(struct vcpu_vmx *vmx)
+{
+	struct vmcs12_list *item, *n;
+	list_for_each_entry_safe(item, n, &vmx->nested.vmcs12_pool, node) {
+		list_del(&item->node);
+		kfree(item);
+	}
+}
+
+/*
  * Emulate the VMXON instruction.
  * Currently, we just remember that VMX is active, and do not save or even
  * inspect the argument to VMXON (the so-called "VMXON pointer") because we
@@ -5207,6 +5277,7 @@ static void free_nested(struct vcpu_vmx *vmx)
 	}
 
 	nested_free_all_saved_vmcss(vmx);
+	nested_free_vmcs12_list(vmx);
 }
 
 /* Emulate the VMXOFF instruction */
@@ -5359,7 +5430,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 	vmcs12 = kmap(page);
-	vmcs12->launch_state = 0;
+	clear_vmcs12_launched(&vmx->nested.vmcs12_pool, __pa(vmcs12));
 	kunmap(page);
 	nested_release_page(page);
 
@@ -6467,6 +6538,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
 	vmx->nested.current_vmptr = -1ull;
 	vmx->nested.current_vmcs12 = NULL;
+	INIT_LIST_HEAD(&vmx->nested.vmcs12_pool);
 
 	return &vmx->vcpu;
 
@@ -6846,6 +6918,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int cpu;
 	struct loaded_vmcs *vmcs02;
+	int is_launched;
 
 	if (!nested_vmx_check_permission(vcpu) ||
 	    !nested_vmx_check_vmcs12(vcpu))
@@ -6864,7 +6937,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	 * for misconfigurations which will anyway be caught by the processor
 	 * when using the merged vmcs02.
 	 */
-	if (vmcs12->launch_state == launch) {
+	is_launched =
+		vmcs12_launched(&vmx->nested.vmcs12_pool, __pa(vmcs12));
+	if (is_launched == launch) {
 		nested_vmx_failValid(vcpu,
 			launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
 			       : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
@@ -6953,7 +7028,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	vcpu->cpu = cpu;
 	put_cpu();
 
-	vmcs12->launch_state = 1;
+	if (set_vmcs12_launched(&vmx->nested.vmcs12_pool, __pa(vmcs12)) < 0)
+		return -ENOMEM;
 
 	prepare_vmcs02(vcpu);
 
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/4] nested vmx: clean up for vmcs12 read and write
  2012-11-21  9:04 ` [PATCH 1/4] nested vmx: clean up for vmcs12 read and write Dongxiao Xu
@ 2012-11-21 13:04   ` Gleb Natapov
  2012-11-22  1:07     ` Xu, Dongxiao
  2012-11-21 13:27   ` Gleb Natapov
  2012-11-21 13:38   ` Orit Wasserman
  2 siblings, 1 reply; 13+ messages in thread
From: Gleb Natapov @ 2012-11-21 13:04 UTC (permalink / raw)
  To: Dongxiao Xu; +Cc: kvm, mtosatti

On Wed, Nov 21, 2012 at 05:04:34PM +0800, Dongxiao Xu wrote:
> abstract vmcs12_read and vmcs12_write functions to do the vmcs12
> read/write operations.
> 
> Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
> ---
>  arch/x86/kvm/vmx.c |   86 +++++++++++++++++++++++++++-------------------------
>  1 files changed, 45 insertions(+), 41 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index f858159..d8670e4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -5407,32 +5407,67 @@ static inline int vmcs_field_readonly(unsigned long field)
>   * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
>   * 64-bit fields are to be returned).
>   */
> -static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
> -					unsigned long field, u64 *ret)
> +static inline u64 vmcs12_read(struct kvm_vcpu *vcpu, unsigned long field)
>  {
>  	short offset = vmcs_field_to_offset(field);
>  	char *p;
>  
> -	if (offset < 0)
> +	if (offset < 0) {
> +		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> +		skip_emulated_instruction(vcpu);
>  		return 0;
> +	}
>  
>  	p = ((char *)(get_vmcs12(vcpu))) + offset;
>  
>  	switch (vmcs_field_type(field)) {
>  	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> -		*ret = *((natural_width *)p);
> +		return *((natural_width *)p);
> +	case VMCS_FIELD_TYPE_U16:
> +		return *((u16 *)p);
> +	case VMCS_FIELD_TYPE_U32:
> +		return *((u32 *)p);
> +	case VMCS_FIELD_TYPE_U64:
> +		return *((u64 *)p);
> +	default:
> +		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> +		skip_emulated_instruction(vcpu);
> +		return 0; /* can never happen. */
> +	}
> +}
> +
> +static inline int vmcs12_write(struct kvm_vcpu *vcpu,
> +			unsigned long field,
> +			u64 value)
> +{
> +	short offset = vmcs_field_to_offset(field);
> +	char *p;
> +
> +	if (offset < 0) {
> +		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> +		skip_emulated_instruction(vcpu);
> +		return 0;
> +	}
> +
Shouldn't vmcs_field_readonly() check be in vmcs12_write() instead of a
caller?

> +	p = ((char *)(get_vmcs12(vcpu))) + offset;
> +
> +	switch (vmcs_field_type(field)) {
> +	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> +		*(natural_width *)p = value;
>  		return 1;
>  	case VMCS_FIELD_TYPE_U16:
> -		*ret = *((u16 *)p);
> +		*(u16 *)p = value;
>  		return 1;
>  	case VMCS_FIELD_TYPE_U32:
> -		*ret = *((u32 *)p);
> +		*(u32 *)p = value;
>  		return 1;
>  	case VMCS_FIELD_TYPE_U64:
> -		*ret = *((u64 *)p);
> +		*(u64 *)p = value;
>  		return 1;
>  	default:
> -		return 0; /* can never happen. */
> +		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> +		skip_emulated_instruction(vcpu);
> +		return 0;
>  	}
>  }
>  
> @@ -5466,11 +5501,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
>  	/* Decode instruction info and find the field to read */
>  	field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
>  	/* Read the field, zero-extended to a u64 field_value */
> -	if (!vmcs12_read_any(vcpu, field, &field_value)) {
> -		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> -		skip_emulated_instruction(vcpu);
> -		return 1;
> -	}
> +	field_value = vmcs12_read(vcpu, field);
>  	/*
>  	 * Now copy part of this value to register or memory, as requested.
>  	 * Note that the number of bits actually copied is 32 or 64 depending
> @@ -5500,8 +5531,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
>  	gva_t gva;
>  	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
>  	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> -	char *p;
> -	short offset;
>  	/* The value to write might be 32 or 64 bits, depending on L1's long
>  	 * mode, and eventually we need to write that into a field of several
>  	 * possible lengths. The code below first zero-extends the value to 64
> @@ -5537,33 +5566,8 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
>  		skip_emulated_instruction(vcpu);
>  		return 1;
>  	}
> -
> -	offset = vmcs_field_to_offset(field);
> -	if (offset < 0) {
> -		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> -		skip_emulated_instruction(vcpu);
> -		return 1;
> -	}
> -	p = ((char *) get_vmcs12(vcpu)) + offset;
> -
> -	switch (vmcs_field_type(field)) {
> -	case VMCS_FIELD_TYPE_U16:
> -		*(u16 *)p = field_value;
> -		break;
> -	case VMCS_FIELD_TYPE_U32:
> -		*(u32 *)p = field_value;
> -		break;
> -	case VMCS_FIELD_TYPE_U64:
> -		*(u64 *)p = field_value;
> -		break;
> -	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> -		*(natural_width *)p = field_value;
> -		break;
> -	default:
> -		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> -		skip_emulated_instruction(vcpu);
> +	if (!vmcs12_write(vcpu, field, field_value))
>  		return 1;
> -	}
>  
>  	nested_vmx_succeed(vcpu);
>  	skip_emulated_instruction(vcpu);
> -- 
> 1.7.1

--
			Gleb.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/4] nested vmx: clean up for vmcs12 read and write
  2012-11-21  9:04 ` [PATCH 1/4] nested vmx: clean up for vmcs12 read and write Dongxiao Xu
  2012-11-21 13:04   ` Gleb Natapov
@ 2012-11-21 13:27   ` Gleb Natapov
  2012-11-22  3:16     ` Xu, Dongxiao
  2012-11-21 13:38   ` Orit Wasserman
  2 siblings, 1 reply; 13+ messages in thread
From: Gleb Natapov @ 2012-11-21 13:27 UTC (permalink / raw)
  To: Dongxiao Xu; +Cc: kvm, mtosatti

On Wed, Nov 21, 2012 at 05:04:34PM +0800, Dongxiao Xu wrote:
> abstract vmcs12_read and vmcs12_write functions to do the vmcs12
> read/write operations.
> 
> Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
> ---
>  arch/x86/kvm/vmx.c |   86 +++++++++++++++++++++++++++-------------------------
>  1 files changed, 45 insertions(+), 41 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index f858159..d8670e4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -5407,32 +5407,67 @@ static inline int vmcs_field_readonly(unsigned long field)
>   * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
>   * 64-bit fields are to be returned).
>   */
> -static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
> -					unsigned long field, u64 *ret)
> +static inline u64 vmcs12_read(struct kvm_vcpu *vcpu, unsigned long field)
>  {
>  	short offset = vmcs_field_to_offset(field);
>  	char *p;
>  
> -	if (offset < 0)
> +	if (offset < 0) {
> +		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> +		skip_emulated_instruction(vcpu);
>  		return 0;
> +	}
>  
>  	p = ((char *)(get_vmcs12(vcpu))) + offset;
>  
>  	switch (vmcs_field_type(field)) {
>  	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> -		*ret = *((natural_width *)p);
> +		return *((natural_width *)p);
> +	case VMCS_FIELD_TYPE_U16:
> +		return *((u16 *)p);
> +	case VMCS_FIELD_TYPE_U32:
> +		return *((u32 *)p);
> +	case VMCS_FIELD_TYPE_U64:
> +		return *((u64 *)p);
> +	default:
> +		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> +		skip_emulated_instruction(vcpu);
> +		return 0; /* can never happen. */
> +	}
> +}
> +
> +static inline int vmcs12_write(struct kvm_vcpu *vcpu,
> +			unsigned long field,
> +			u64 value)
> +{
> +	short offset = vmcs_field_to_offset(field);
> +	char *p;
> +
> +	if (offset < 0) {
> +		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> +		skip_emulated_instruction(vcpu);
> +		return 0;
> +	}
> +
> +	p = ((char *)(get_vmcs12(vcpu))) + offset;
> +
> +	switch (vmcs_field_type(field)) {
> +	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> +		*(natural_width *)p = value;
>  		return 1;
>  	case VMCS_FIELD_TYPE_U16:
> -		*ret = *((u16 *)p);
> +		*(u16 *)p = value;
>  		return 1;
>  	case VMCS_FIELD_TYPE_U32:
> -		*ret = *((u32 *)p);
> +		*(u32 *)p = value;
>  		return 1;
>  	case VMCS_FIELD_TYPE_U64:
> -		*ret = *((u64 *)p);
> +		*(u64 *)p = value;
>  		return 1;
>  	default:
> -		return 0; /* can never happen. */
> +		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> +		skip_emulated_instruction(vcpu);
> +		return 0;
>  	}
>  }
>  
> @@ -5466,11 +5501,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
>  	/* Decode instruction info and find the field to read */
>  	field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
>  	/* Read the field, zero-extended to a u64 field_value */
> -	if (!vmcs12_read_any(vcpu, field, &field_value)) {
> -		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> -		skip_emulated_instruction(vcpu);
> -		return 1;
> -	}
> +	field_value = vmcs12_read(vcpu, field);
You do not handle failure here and always write back field_value even if
vmcs12_read() failed. Actually now it is impossible to detect a
failure. Call to nested_vmx_failValid() in vmcs12_read() will be
overwritten by call to nested_vmx_succeed() at the end of
handle_vmread() and skip_emulated_instruction() will be called twice.

> +             skip_emulated_instruction(vcpu);

>  	/*
>  	 * Now copy part of this value to register or memory, as requested.
>  	 * Note that the number of bits actually copied is 32 or 64 depending
> @@ -5500,8 +5531,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
>  	gva_t gva;
>  	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
>  	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> -	char *p;
> -	short offset;
>  	/* The value to write might be 32 or 64 bits, depending on L1's long
>  	 * mode, and eventually we need to write that into a field of several
>  	 * possible lengths. The code below first zero-extends the value to 64
> @@ -5537,33 +5566,8 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
>  		skip_emulated_instruction(vcpu);
>  		return 1;
>  	}
> -
> -	offset = vmcs_field_to_offset(field);
> -	if (offset < 0) {
> -		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> -		skip_emulated_instruction(vcpu);
> -		return 1;
> -	}
> -	p = ((char *) get_vmcs12(vcpu)) + offset;
> -
> -	switch (vmcs_field_type(field)) {
> -	case VMCS_FIELD_TYPE_U16:
> -		*(u16 *)p = field_value;
> -		break;
> -	case VMCS_FIELD_TYPE_U32:
> -		*(u32 *)p = field_value;
> -		break;
> -	case VMCS_FIELD_TYPE_U64:
> -		*(u64 *)p = field_value;
> -		break;
> -	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> -		*(natural_width *)p = field_value;
> -		break;
> -	default:
> -		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> -		skip_emulated_instruction(vcpu);
> +	if (!vmcs12_write(vcpu, field, field_value))
>  		return 1;
> -	}
>  
>  	nested_vmx_succeed(vcpu);
>  	skip_emulated_instruction(vcpu);
> -- 
> 1.7.1

--
			Gleb.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/4] nested vmx: clean up for vmcs12 read and write
  2012-11-21  9:04 ` [PATCH 1/4] nested vmx: clean up for vmcs12 read and write Dongxiao Xu
  2012-11-21 13:04   ` Gleb Natapov
  2012-11-21 13:27   ` Gleb Natapov
@ 2012-11-21 13:38   ` Orit Wasserman
  2 siblings, 0 replies; 13+ messages in thread
From: Orit Wasserman @ 2012-11-21 13:38 UTC (permalink / raw)
  To: Dongxiao Xu; +Cc: kvm, mtosatti, gleb

On 11/21/2012 11:04 AM, Dongxiao Xu wrote:
> abstract vmcs12_read and vmcs12_write functions to do the vmcs12
> read/write operations.
> 
> Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
> ---
>  arch/x86/kvm/vmx.c |   86 +++++++++++++++++++++++++++-------------------------
>  1 files changed, 45 insertions(+), 41 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index f858159..d8670e4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -5407,32 +5407,67 @@ static inline int vmcs_field_readonly(unsigned long field)
>   * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
>   * 64-bit fields are to be returned).
>   */
> -static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
> -					unsigned long field, u64 *ret)
> +static inline u64 vmcs12_read(struct kvm_vcpu *vcpu, unsigned long field)
>  {
>  	short offset = vmcs_field_to_offset(field);
>  	char *p;
>  
> -	if (offset < 0)
> +	if (offset < 0) {
> +		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> +		skip_emulated_instruction(vcpu);
>  		return 0;
> +	}
>  
>  	p = ((char *)(get_vmcs12(vcpu))) + offset;
>  
>  	switch (vmcs_field_type(field)) {
>  	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> -		*ret = *((natural_width *)p);
> +		return *((natural_width *)p);
> +	case VMCS_FIELD_TYPE_U16:
> +		return *((u16 *)p);
> +	case VMCS_FIELD_TYPE_U32:
> +		return *((u32 *)p);
> +	case VMCS_FIELD_TYPE_U64:
> +		return *((u64 *)p);
> +	default:
> +		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> +		skip_emulated_instruction(vcpu);
> +		return 0; /* can never happen. */
> +	}
> +}
> +
> +static inline int vmcs12_write(struct kvm_vcpu *vcpu,
> +			unsigned long field,
> +			u64 value)
> +{
> +	short offset = vmcs_field_to_offset(field);
> +	char *p;
> +
> +	if (offset < 0) {
> +		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> +		skip_emulated_instruction(vcpu);
> +		return 0;
> +	}
> +
> +	p = ((char *)(get_vmcs12(vcpu))) + offset;
> +
> +	switch (vmcs_field_type(field)) {
> +	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> +		*(natural_width *)p = value;
>  		return 1;
>  	case VMCS_FIELD_TYPE_U16:
> -		*ret = *((u16 *)p);
> +		*(u16 *)p = value;
>  		return 1;
>  	case VMCS_FIELD_TYPE_U32:
> -		*ret = *((u32 *)p);
> +		*(u32 *)p = value;
>  		return 1;
>  	case VMCS_FIELD_TYPE_U64:
> -		*ret = *((u64 *)p);
> +		*(u64 *)p = value;
>  		return 1;
>  	default:
> -		return 0; /* can never happen. */
> +		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> +		skip_emulated_instruction(vcpu);
> +		return 0;
>  	}
>  }
>  
> @@ -5466,11 +5501,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
>  	/* Decode instruction info and find the field to read */
>  	field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
>  	/* Read the field, zero-extended to a u64 field_value */
> -	if (!vmcs12_read_any(vcpu, field, &field_value)) {
> -		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> -		skip_emulated_instruction(vcpu);
> -		return 1;
> -	}
> +	field_value = vmcs12_read(vcpu, field);
If you had an error here, you will still continue running the function,
you will override the failure (by calling nested_vmx_succeed) and skip emulated instruction twice.
You need to detect there was an error and return (like in vmwrite),

Orit
>  	/*
>  	 * Now copy part of this value to register or memory, as requested.
>  	 * Note that the number of bits actually copied is 32 or 64 depending
> @@ -5500,8 +5531,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
>  	gva_t gva;
>  	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
>  	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> -	char *p;
> -	short offset;
>  	/* The value to write might be 32 or 64 bits, depending on L1's long
>  	 * mode, and eventually we need to write that into a field of several
>  	 * possible lengths. The code below first zero-extends the value to 64
> @@ -5537,33 +5566,8 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
>  		skip_emulated_instruction(vcpu);
>  		return 1;
>  	}
> -
> -	offset = vmcs_field_to_offset(field);
> -	if (offset < 0) {
> -		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> -		skip_emulated_instruction(vcpu);
> -		return 1;
> -	}
> -	p = ((char *) get_vmcs12(vcpu)) + offset;
> -
> -	switch (vmcs_field_type(field)) {
> -	case VMCS_FIELD_TYPE_U16:
> -		*(u16 *)p = field_value;
> -		break;
> -	case VMCS_FIELD_TYPE_U32:
> -		*(u32 *)p = field_value;
> -		break;
> -	case VMCS_FIELD_TYPE_U64:
> -		*(u64 *)p = field_value;
> -		break;
> -	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> -		*(natural_width *)p = field_value;
> -		break;
> -	default:
> -		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> -		skip_emulated_instruction(vcpu);
> +	if (!vmcs12_write(vcpu, field, field_value))
>  		return 1;
> -	}
>  
>  	nested_vmx_succeed(vcpu);
>  	skip_emulated_instruction(vcpu);
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] nested vmx: use a list to store the launched vmcs12 for L1 VMM
  2012-11-21  9:04 ` [PATCH 4/4] nested vmx: use a list to store the launched vmcs12 for L1 VMM Dongxiao Xu
@ 2012-11-21 14:15   ` Gleb Natapov
  2012-11-22  1:17     ` Xu, Dongxiao
  0 siblings, 1 reply; 13+ messages in thread
From: Gleb Natapov @ 2012-11-21 14:15 UTC (permalink / raw)
  To: Dongxiao Xu; +Cc: kvm, mtosatti

On Wed, Nov 21, 2012 at 05:04:37PM +0800, Dongxiao Xu wrote:
> The launch state is not a member in the VMCS area, use a separate
> variable (list) to store it instead.
> 
Why? Guest shouldn't be aware of the format of VMCS area.

> Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
> ---
>  arch/x86/kvm/vmx.c |   86 +++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 files changed, 81 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 6687fb6..d03ab4e 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -177,8 +177,7 @@ struct __packed vmcs12 {
>  	u32 revision_id;
>  	u32 abort;
>  
> -	u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
> -	u32 padding[7]; /* room for future expansion */
> +	u32 padding[8]; /* room for future expansion */
>  
>  	u64 io_bitmap_a;
>  	u64 io_bitmap_b;
> @@ -339,6 +338,11 @@ struct vmcs02_list {
>  	struct loaded_vmcs vmcs02;
>  };
>  
> +struct vmcs12_list {
> +	unsigned long vmcs12_pa;
> +	struct list_head node;
> +};
> +
>  /*
>   * The nested_vmx structure is part of vcpu_vmx, and holds information we need
>   * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
> @@ -364,6 +368,8 @@ struct nested_vmx {
>  	 * we must keep them pinned while L2 runs.
>  	 */
>  	struct page *apic_access_page;
> +	/* vmcs12_pool contains the launched vmcs12. */
> +	struct list_head vmcs12_pool;
>  };
>  
>  struct vcpu_vmx {
> @@ -614,6 +620,58 @@ static void nested_release_page_clean(struct page *page)
>  	kvm_release_page_clean(page);
>  }
>  
> +static int vmcs12_launched(struct list_head *vmcs12_pool,
> +			       unsigned long vmcs12_pa)
> +{
> +	struct vmcs12_list *iter;
> +	struct list_head *pos;
> +	int launched = 0;
> +
> +	list_for_each(pos, vmcs12_pool) {
> +		iter = list_entry(pos, struct vmcs12_list, node);
> +		if (vmcs12_pa == iter->vmcs12_pa) {
> +			launched = 1;
> +			break;
> +		}
> +	}
> +
> +	return launched;
> +}
> +
> +static int set_vmcs12_launched(struct list_head *vmcs12_pool,
> +			   unsigned long vmcs12_pa)
> +{
> +	struct vmcs12_list *vmcs12;
> +
> +	if (vmcs12_launched(vmcs12_pool, vmcs12_pa))
> +		return 0;
> +
> +	vmcs12 = kzalloc(sizeof(struct vmcs12_list), GFP_KERNEL);
> +	if (!vmcs12)
> +		return -ENOMEM;
> +
> +	vmcs12->vmcs12_pa = vmcs12_pa;
> +	list_add(&vmcs12->node, vmcs12_pool);
> +
> +	return 0;
> +}
> +
> +static void clear_vmcs12_launched(struct list_head *vmcs12_pool,
> +			       unsigned long vmcs12_pa)
> +{
> +	struct vmcs12_list *iter;
> +	struct list_head *pos;
> +
> +	list_for_each(pos, vmcs12_pool) {
> +		iter = list_entry(pos, struct vmcs12_list, node);
> +		if (vmcs12_pa == iter->vmcs12_pa) {
> +			list_del(&iter->node);
> +			kfree(iter);
> +			break;
> +		}
> +	}
> +}
> +
>  static u64 construct_eptp(unsigned long root_hpa);
>  static void kvm_cpu_vmxon(u64 addr);
>  static void kvm_cpu_vmxoff(void);
> @@ -5111,6 +5169,18 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
>  }
>  
>  /*
> + * Free the vmcs12 list.
> + */
> +static void nested_free_vmcs12_list(struct vcpu_vmx *vmx)
> +{
> +	struct vmcs12_list *item, *n;
> +	list_for_each_entry_safe(item, n, &vmx->nested.vmcs12_pool, node) {
> +		list_del(&item->node);
> +		kfree(item);
> +	}
> +}
> +
> +/*
>   * Emulate the VMXON instruction.
>   * Currently, we just remember that VMX is active, and do not save or even
>   * inspect the argument to VMXON (the so-called "VMXON pointer") because we
> @@ -5207,6 +5277,7 @@ static void free_nested(struct vcpu_vmx *vmx)
>  	}
>  
>  	nested_free_all_saved_vmcss(vmx);
> +	nested_free_vmcs12_list(vmx);
>  }
>  
>  /* Emulate the VMXOFF instruction */
> @@ -5359,7 +5430,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
>  		return 1;
>  	}
>  	vmcs12 = kmap(page);
> -	vmcs12->launch_state = 0;
> +	clear_vmcs12_launched(&vmx->nested.vmcs12_pool, __pa(vmcs12));
>  	kunmap(page);
>  	nested_release_page(page);
>  
> @@ -6467,6 +6538,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  
>  	vmx->nested.current_vmptr = -1ull;
>  	vmx->nested.current_vmcs12 = NULL;
> +	INIT_LIST_HEAD(&vmx->nested.vmcs12_pool);
>  
>  	return &vmx->vcpu;
>  
> @@ -6846,6 +6918,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	int cpu;
>  	struct loaded_vmcs *vmcs02;
> +	int is_launched;
>  
>  	if (!nested_vmx_check_permission(vcpu) ||
>  	    !nested_vmx_check_vmcs12(vcpu))
> @@ -6864,7 +6937,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
>  	 * for misconfigurations which will anyway be caught by the processor
>  	 * when using the merged vmcs02.
>  	 */
> -	if (vmcs12->launch_state == launch) {
> +	is_launched =
> +		vmcs12_launched(&vmx->nested.vmcs12_pool, __pa(vmcs12));
> +	if (is_launched == launch) {
>  		nested_vmx_failValid(vcpu,
>  			launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
>  			       : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
> @@ -6953,7 +7028,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
>  	vcpu->cpu = cpu;
>  	put_cpu();
>  
> -	vmcs12->launch_state = 1;
> +	if (set_vmcs12_launched(&vmx->nested.vmcs12_pool, __pa(vmcs12)) < 0)
> +		return -ENOMEM;
>  
>  	prepare_vmcs02(vcpu);
>  
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH 1/4] nested vmx: clean up for vmcs12 read and write
  2012-11-21 13:04   ` Gleb Natapov
@ 2012-11-22  1:07     ` Xu, Dongxiao
  0 siblings, 0 replies; 13+ messages in thread
From: Xu, Dongxiao @ 2012-11-22  1:07 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti



> -----Original Message-----
> From: Gleb Natapov [mailto:gleb@redhat.com]
> Sent: Wednesday, November 21, 2012 9:04 PM
> To: Xu, Dongxiao
> Cc: kvm@vger.kernel.org; mtosatti@redhat.com
> Subject: Re: [PATCH 1/4] nested vmx: clean up for vmcs12 read and write
> 
> On Wed, Nov 21, 2012 at 05:04:34PM +0800, Dongxiao Xu wrote:
> > abstract vmcs12_read and vmcs12_write functions to do the vmcs12
> > read/write operations.
> >
> > Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
> > ---
> >  arch/x86/kvm/vmx.c |   86
> +++++++++++++++++++++++++++-------------------------
> >  1 files changed, 45 insertions(+), 41 deletions(-)
> >
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
> > f858159..d8670e4 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -5407,32 +5407,67 @@ static inline int vmcs_field_readonly(unsigned
> long field)
> >   * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
> >   * 64-bit fields are to be returned).
> >   */
> > -static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
> > -					unsigned long field, u64 *ret)
> > +static inline u64 vmcs12_read(struct kvm_vcpu *vcpu, unsigned long
> > +field)
> >  {
> >  	short offset = vmcs_field_to_offset(field);
> >  	char *p;
> >
> > -	if (offset < 0)
> > +	if (offset < 0) {
> > +		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > +		skip_emulated_instruction(vcpu);
> >  		return 0;
> > +	}
> >
> >  	p = ((char *)(get_vmcs12(vcpu))) + offset;
> >
> >  	switch (vmcs_field_type(field)) {
> >  	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> > -		*ret = *((natural_width *)p);
> > +		return *((natural_width *)p);
> > +	case VMCS_FIELD_TYPE_U16:
> > +		return *((u16 *)p);
> > +	case VMCS_FIELD_TYPE_U32:
> > +		return *((u32 *)p);
> > +	case VMCS_FIELD_TYPE_U64:
> > +		return *((u64 *)p);
> > +	default:
> > +		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > +		skip_emulated_instruction(vcpu);
> > +		return 0; /* can never happen. */
> > +	}
> > +}
> > +
> > +static inline int vmcs12_write(struct kvm_vcpu *vcpu,
> > +			unsigned long field,
> > +			u64 value)
> > +{
> > +	short offset = vmcs_field_to_offset(field);
> > +	char *p;
> > +
> > +	if (offset < 0) {
> > +		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > +		skip_emulated_instruction(vcpu);
> > +		return 0;
> > +	}
> > +
> Shouldn't vmcs_field_readonly() check be in vmcs12_write() instead of a caller?

Well, you can see from the later patch that, we construct vmcs12 through the vmcs12_write() function. Some fields like the exit reason, exit qualification are needed to be written into the vmcs12 area. Therefore we could not put the read only check into the vmcs12_write() function.

Thanks,
Dongxiao

> 
> > +	p = ((char *)(get_vmcs12(vcpu))) + offset;
> > +
> > +	switch (vmcs_field_type(field)) {
> > +	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> > +		*(natural_width *)p = value;
> >  		return 1;
> >  	case VMCS_FIELD_TYPE_U16:
> > -		*ret = *((u16 *)p);
> > +		*(u16 *)p = value;
> >  		return 1;
> >  	case VMCS_FIELD_TYPE_U32:
> > -		*ret = *((u32 *)p);
> > +		*(u32 *)p = value;
> >  		return 1;
> >  	case VMCS_FIELD_TYPE_U64:
> > -		*ret = *((u64 *)p);
> > +		*(u64 *)p = value;
> >  		return 1;
> >  	default:
> > -		return 0; /* can never happen. */
> > +		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > +		skip_emulated_instruction(vcpu);
> > +		return 0;
> >  	}
> >  }
> >
> > @@ -5466,11 +5501,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
> >  	/* Decode instruction info and find the field to read */
> >  	field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
> >  	/* Read the field, zero-extended to a u64 field_value */
> > -	if (!vmcs12_read_any(vcpu, field, &field_value)) {
> > -		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > -		skip_emulated_instruction(vcpu);
> > -		return 1;
> > -	}
> > +	field_value = vmcs12_read(vcpu, field);
> >  	/*
> >  	 * Now copy part of this value to register or memory, as requested.
> >  	 * Note that the number of bits actually copied is 32 or 64
> > depending @@ -5500,8 +5531,6 @@ static int handle_vmwrite(struct
> kvm_vcpu *vcpu)
> >  	gva_t gva;
> >  	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> >  	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> > -	char *p;
> > -	short offset;
> >  	/* The value to write might be 32 or 64 bits, depending on L1's long
> >  	 * mode, and eventually we need to write that into a field of several
> >  	 * possible lengths. The code below first zero-extends the value to
> > 64 @@ -5537,33 +5566,8 @@ static int handle_vmwrite(struct kvm_vcpu
> *vcpu)
> >  		skip_emulated_instruction(vcpu);
> >  		return 1;
> >  	}
> > -
> > -	offset = vmcs_field_to_offset(field);
> > -	if (offset < 0) {
> > -		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > -		skip_emulated_instruction(vcpu);
> > -		return 1;
> > -	}
> > -	p = ((char *) get_vmcs12(vcpu)) + offset;
> > -
> > -	switch (vmcs_field_type(field)) {
> > -	case VMCS_FIELD_TYPE_U16:
> > -		*(u16 *)p = field_value;
> > -		break;
> > -	case VMCS_FIELD_TYPE_U32:
> > -		*(u32 *)p = field_value;
> > -		break;
> > -	case VMCS_FIELD_TYPE_U64:
> > -		*(u64 *)p = field_value;
> > -		break;
> > -	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> > -		*(natural_width *)p = field_value;
> > -		break;
> > -	default:
> > -		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > -		skip_emulated_instruction(vcpu);
> > +	if (!vmcs12_write(vcpu, field, field_value))
> >  		return 1;
> > -	}
> >
> >  	nested_vmx_succeed(vcpu);
> >  	skip_emulated_instruction(vcpu);
> > --
> > 1.7.1
> 
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH 4/4] nested vmx: use a list to store the launched vmcs12 for L1 VMM
  2012-11-21 14:15   ` Gleb Natapov
@ 2012-11-22  1:17     ` Xu, Dongxiao
  0 siblings, 0 replies; 13+ messages in thread
From: Xu, Dongxiao @ 2012-11-22  1:17 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

> -----Original Message-----
> From: Gleb Natapov [mailto:gleb@redhat.com]
> Sent: Wednesday, November 21, 2012 10:15 PM
> To: Xu, Dongxiao
> Cc: kvm@vger.kernel.org; mtosatti@redhat.com
> Subject: Re: [PATCH 4/4] nested vmx: use a list to store the launched vmcs12
> for L1 VMM
> 
> On Wed, Nov 21, 2012 at 05:04:37PM +0800, Dongxiao Xu wrote:
> > The launch state is not a member in the VMCS area, use a separate
> > variable (list) to store it instead.
> >
> Why? Guest shouldn't be aware of the format of VMCS area.

Yes, I agree. Guest VMM/L1 VMM shouldn't be aware of the VMCS format.

For Root VMM/L0 VMM, it need to track the launch state of the vmcs12, in order to correctly emulate the VMLAUNCH and VMRESUME instructions.
Originally we store the launch state in the VMCS area, however in fact, there is no "launch state" field in VMCS. This patch is to move it out and use a separate list to store it.

Thanks,
Dongxiao


> 
> > Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
> > ---
> >  arch/x86/kvm/vmx.c |   86
> +++++++++++++++++++++++++++++++++++++++++++++++++---
> >  1 files changed, 81 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
> > 6687fb6..d03ab4e 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -177,8 +177,7 @@ struct __packed vmcs12 {
> >  	u32 revision_id;
> >  	u32 abort;
> >
> > -	u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
> > -	u32 padding[7]; /* room for future expansion */
> > +	u32 padding[8]; /* room for future expansion */
> >
> >  	u64 io_bitmap_a;
> >  	u64 io_bitmap_b;
> > @@ -339,6 +338,11 @@ struct vmcs02_list {
> >  	struct loaded_vmcs vmcs02;
> >  };
> >
> > +struct vmcs12_list {
> > +	unsigned long vmcs12_pa;
> > +	struct list_head node;
> > +};
> > +
> >  /*
> >   * The nested_vmx structure is part of vcpu_vmx, and holds information we
> need
> >   * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
> > @@ -364,6 +368,8 @@ struct nested_vmx {
> >  	 * we must keep them pinned while L2 runs.
> >  	 */
> >  	struct page *apic_access_page;
> > +	/* vmcs12_pool contains the launched vmcs12. */
> > +	struct list_head vmcs12_pool;
> >  };
> >
> >  struct vcpu_vmx {
> > @@ -614,6 +620,58 @@ static void nested_release_page_clean(struct page
> *page)
> >  	kvm_release_page_clean(page);
> >  }
> >
> > +static int vmcs12_launched(struct list_head *vmcs12_pool,
> > +			       unsigned long vmcs12_pa)
> > +{
> > +	struct vmcs12_list *iter;
> > +	struct list_head *pos;
> > +	int launched = 0;
> > +
> > +	list_for_each(pos, vmcs12_pool) {
> > +		iter = list_entry(pos, struct vmcs12_list, node);
> > +		if (vmcs12_pa == iter->vmcs12_pa) {
> > +			launched = 1;
> > +			break;
> > +		}
> > +	}
> > +
> > +	return launched;
> > +}
> > +
> > +static int set_vmcs12_launched(struct list_head *vmcs12_pool,
> > +			   unsigned long vmcs12_pa)
> > +{
> > +	struct vmcs12_list *vmcs12;
> > +
> > +	if (vmcs12_launched(vmcs12_pool, vmcs12_pa))
> > +		return 0;
> > +
> > +	vmcs12 = kzalloc(sizeof(struct vmcs12_list), GFP_KERNEL);
> > +	if (!vmcs12)
> > +		return -ENOMEM;
> > +
> > +	vmcs12->vmcs12_pa = vmcs12_pa;
> > +	list_add(&vmcs12->node, vmcs12_pool);
> > +
> > +	return 0;
> > +}
> > +
> > +static void clear_vmcs12_launched(struct list_head *vmcs12_pool,
> > +			       unsigned long vmcs12_pa)
> > +{
> > +	struct vmcs12_list *iter;
> > +	struct list_head *pos;
> > +
> > +	list_for_each(pos, vmcs12_pool) {
> > +		iter = list_entry(pos, struct vmcs12_list, node);
> > +		if (vmcs12_pa == iter->vmcs12_pa) {
> > +			list_del(&iter->node);
> > +			kfree(iter);
> > +			break;
> > +		}
> > +	}
> > +}
> > +
> >  static u64 construct_eptp(unsigned long root_hpa);  static void
> > kvm_cpu_vmxon(u64 addr);  static void kvm_cpu_vmxoff(void); @@ -5111,6
> > +5169,18 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx
> > *vmx)  }
> >
> >  /*
> > + * Free the vmcs12 list.
> > + */
> > +static void nested_free_vmcs12_list(struct vcpu_vmx *vmx) {
> > +	struct vmcs12_list *item, *n;
> > +	list_for_each_entry_safe(item, n, &vmx->nested.vmcs12_pool, node) {
> > +		list_del(&item->node);
> > +		kfree(item);
> > +	}
> > +}
> > +
> > +/*
> >   * Emulate the VMXON instruction.
> >   * Currently, we just remember that VMX is active, and do not save or even
> >   * inspect the argument to VMXON (the so-called "VMXON pointer")
> > because we @@ -5207,6 +5277,7 @@ static void free_nested(struct
> vcpu_vmx *vmx)
> >  	}
> >
> >  	nested_free_all_saved_vmcss(vmx);
> > +	nested_free_vmcs12_list(vmx);
> >  }
> >
> >  /* Emulate the VMXOFF instruction */
> > @@ -5359,7 +5430,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
> >  		return 1;
> >  	}
> >  	vmcs12 = kmap(page);
> > -	vmcs12->launch_state = 0;
> > +	clear_vmcs12_launched(&vmx->nested.vmcs12_pool, __pa(vmcs12));
> >  	kunmap(page);
> >  	nested_release_page(page);
> >
> > @@ -6467,6 +6538,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct
> > kvm *kvm, unsigned int id)
> >
> >  	vmx->nested.current_vmptr = -1ull;
> >  	vmx->nested.current_vmcs12 = NULL;
> > +	INIT_LIST_HEAD(&vmx->nested.vmcs12_pool);
> >
> >  	return &vmx->vcpu;
> >
> > @@ -6846,6 +6918,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu,
> bool launch)
> >  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> >  	int cpu;
> >  	struct loaded_vmcs *vmcs02;
> > +	int is_launched;
> >
> >  	if (!nested_vmx_check_permission(vcpu) ||
> >  	    !nested_vmx_check_vmcs12(vcpu))
> > @@ -6864,7 +6937,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu,
> bool launch)
> >  	 * for misconfigurations which will anyway be caught by the processor
> >  	 * when using the merged vmcs02.
> >  	 */
> > -	if (vmcs12->launch_state == launch) {
> > +	is_launched =
> > +		vmcs12_launched(&vmx->nested.vmcs12_pool, __pa(vmcs12));
> > +	if (is_launched == launch) {
> >  		nested_vmx_failValid(vcpu,
> >  			launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
> >  			       : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
> > @@ -6953,7 +7028,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu,
> bool launch)
> >  	vcpu->cpu = cpu;
> >  	put_cpu();
> >
> > -	vmcs12->launch_state = 1;
> > +	if (set_vmcs12_launched(&vmx->nested.vmcs12_pool, __pa(vmcs12)) < 0)
> > +		return -ENOMEM;
> >
> >  	prepare_vmcs02(vcpu);
> >
> > --
> > 1.7.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in the
> > body of a message to majordomo@vger.kernel.org More majordomo info at
> > http://vger.kernel.org/majordomo-info.html
> 
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH 1/4] nested vmx: clean up for vmcs12 read and write
  2012-11-21 13:27   ` Gleb Natapov
@ 2012-11-22  3:16     ` Xu, Dongxiao
  2012-11-22  6:45       ` Gleb Natapov
  0 siblings, 1 reply; 13+ messages in thread
From: Xu, Dongxiao @ 2012-11-22  3:16 UTC (permalink / raw)
  To: Gleb Natapov, Orit Wasserman; +Cc: kvm, mtosatti



> -----Original Message-----
> From: Gleb Natapov [mailto:gleb@redhat.com]
> Sent: Wednesday, November 21, 2012 9:27 PM
> To: Xu, Dongxiao
> Cc: kvm@vger.kernel.org; mtosatti@redhat.com
> Subject: Re: [PATCH 1/4] nested vmx: clean up for vmcs12 read and write
> 
> On Wed, Nov 21, 2012 at 05:04:34PM +0800, Dongxiao Xu wrote:
> > abstract vmcs12_read and vmcs12_write functions to do the vmcs12
> > read/write operations.
> >
> > Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
> > ---
> >  arch/x86/kvm/vmx.c |   86
> +++++++++++++++++++++++++++-------------------------
> >  1 files changed, 45 insertions(+), 41 deletions(-)
> >
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
> > f858159..d8670e4 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -5407,32 +5407,67 @@ static inline int vmcs_field_readonly(unsigned
> long field)
> >   * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
> >   * 64-bit fields are to be returned).
> >   */
> > -static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
> > -					unsigned long field, u64 *ret)
> > +static inline u64 vmcs12_read(struct kvm_vcpu *vcpu, unsigned long
> > +field)
> >  {
> >  	short offset = vmcs_field_to_offset(field);
> >  	char *p;
> >
> > -	if (offset < 0)
> > +	if (offset < 0) {
> > +		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > +		skip_emulated_instruction(vcpu);
> >  		return 0;
> > +	}
> >
> >  	p = ((char *)(get_vmcs12(vcpu))) + offset;
> >
> >  	switch (vmcs_field_type(field)) {
> >  	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> > -		*ret = *((natural_width *)p);
> > +		return *((natural_width *)p);
> > +	case VMCS_FIELD_TYPE_U16:
> > +		return *((u16 *)p);
> > +	case VMCS_FIELD_TYPE_U32:
> > +		return *((u32 *)p);
> > +	case VMCS_FIELD_TYPE_U64:
> > +		return *((u64 *)p);
> > +	default:
> > +		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > +		skip_emulated_instruction(vcpu);
> > +		return 0; /* can never happen. */
> > +	}
> > +}
> > +
> > +static inline int vmcs12_write(struct kvm_vcpu *vcpu,
> > +			unsigned long field,
> > +			u64 value)
> > +{
> > +	short offset = vmcs_field_to_offset(field);
> > +	char *p;
> > +
> > +	if (offset < 0) {
> > +		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > +		skip_emulated_instruction(vcpu);
> > +		return 0;
> > +	}
> > +
> > +	p = ((char *)(get_vmcs12(vcpu))) + offset;
> > +
> > +	switch (vmcs_field_type(field)) {
> > +	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> > +		*(natural_width *)p = value;
> >  		return 1;
> >  	case VMCS_FIELD_TYPE_U16:
> > -		*ret = *((u16 *)p);
> > +		*(u16 *)p = value;
> >  		return 1;
> >  	case VMCS_FIELD_TYPE_U32:
> > -		*ret = *((u32 *)p);
> > +		*(u32 *)p = value;
> >  		return 1;
> >  	case VMCS_FIELD_TYPE_U64:
> > -		*ret = *((u64 *)p);
> > +		*(u64 *)p = value;
> >  		return 1;
> >  	default:
> > -		return 0; /* can never happen. */
> > +		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > +		skip_emulated_instruction(vcpu);
> > +		return 0;
> >  	}
> >  }
> >
> > @@ -5466,11 +5501,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
> >  	/* Decode instruction info and find the field to read */
> >  	field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
> >  	/* Read the field, zero-extended to a u64 field_value */
> > -	if (!vmcs12_read_any(vcpu, field, &field_value)) {
> > -		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > -		skip_emulated_instruction(vcpu);
> > -		return 1;
> > -	}
> > +	field_value = vmcs12_read(vcpu, field);
> You do not handle failure here and always write back field_value even if
> vmcs12_read() failed. Actually now it is impossible to detect a failure. Call to
> nested_vmx_failValid() in vmcs12_read() will be overwritten by call to
> nested_vmx_succeed() at the end of
> handle_vmread() and skip_emulated_instruction() will be called twice.

Thanks Gleb and Orit to raise this issue.

What about moving the offset check outside the vmcs12_read() and vmcs12_write() function, and put it directly in handle_vmread() and handle_vmwrite()?
I think we only need to do offset error check in handle_vmread() and handle_vmwrite() since they are to emulate correct behavior for guest VMM. For example, if guest VMM reads a field that is not valid or writes a field that is read only, then in emulation code handle_vmread() and handle_vmwrite, we need to raise error to guest VMM.
For other calling of vmcs12_read() and vmcs12_write() functions in KVM hypervisor (see PATCH 3/4), actually the caller needs to ensure the field should be valid. Otherwise it is a bug in KVM hypervisor itself.
Does it make sense? If so, I will revise this patch according to above.

Thanks,
Dongxiao

> 
> > +             skip_emulated_instruction(vcpu);
> 
> >  	/*
> >  	 * Now copy part of this value to register or memory, as requested.
> >  	 * Note that the number of bits actually copied is 32 or 64
> > depending @@ -5500,8 +5531,6 @@ static int handle_vmwrite(struct
> kvm_vcpu *vcpu)
> >  	gva_t gva;
> >  	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> >  	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> > -	char *p;
> > -	short offset;
> >  	/* The value to write might be 32 or 64 bits, depending on L1's long
> >  	 * mode, and eventually we need to write that into a field of several
> >  	 * possible lengths. The code below first zero-extends the value to
> > 64 @@ -5537,33 +5566,8 @@ static int handle_vmwrite(struct kvm_vcpu
> *vcpu)
> >  		skip_emulated_instruction(vcpu);
> >  		return 1;
> >  	}
> > -
> > -	offset = vmcs_field_to_offset(field);
> > -	if (offset < 0) {
> > -		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > -		skip_emulated_instruction(vcpu);
> > -		return 1;
> > -	}
> > -	p = ((char *) get_vmcs12(vcpu)) + offset;
> > -
> > -	switch (vmcs_field_type(field)) {
> > -	case VMCS_FIELD_TYPE_U16:
> > -		*(u16 *)p = field_value;
> > -		break;
> > -	case VMCS_FIELD_TYPE_U32:
> > -		*(u32 *)p = field_value;
> > -		break;
> > -	case VMCS_FIELD_TYPE_U64:
> > -		*(u64 *)p = field_value;
> > -		break;
> > -	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> > -		*(natural_width *)p = field_value;
> > -		break;
> > -	default:
> > -		nested_vmx_failValid(vcpu,
> VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > -		skip_emulated_instruction(vcpu);
> > +	if (!vmcs12_write(vcpu, field, field_value))
> >  		return 1;
> > -	}
> >
> >  	nested_vmx_succeed(vcpu);
> >  	skip_emulated_instruction(vcpu);
> > --
> > 1.7.1
> 
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/4] nested vmx: clean up for vmcs12 read and write
  2012-11-22  3:16     ` Xu, Dongxiao
@ 2012-11-22  6:45       ` Gleb Natapov
  0 siblings, 0 replies; 13+ messages in thread
From: Gleb Natapov @ 2012-11-22  6:45 UTC (permalink / raw)
  To: Xu, Dongxiao; +Cc: Orit Wasserman, kvm, mtosatti

On Thu, Nov 22, 2012 at 03:16:47AM +0000, Xu, Dongxiao wrote:
> 
> 
> > -----Original Message-----
> > From: Gleb Natapov [mailto:gleb@redhat.com]
> > Sent: Wednesday, November 21, 2012 9:27 PM
> > To: Xu, Dongxiao
> > Cc: kvm@vger.kernel.org; mtosatti@redhat.com
> > Subject: Re: [PATCH 1/4] nested vmx: clean up for vmcs12 read and write
> > 
> > On Wed, Nov 21, 2012 at 05:04:34PM +0800, Dongxiao Xu wrote:
> > > abstract vmcs12_read and vmcs12_write functions to do the vmcs12
> > > read/write operations.
> > >
> > > Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
> > > ---
> > >  arch/x86/kvm/vmx.c |   86
> > +++++++++++++++++++++++++++-------------------------
> > >  1 files changed, 45 insertions(+), 41 deletions(-)
> > >
> > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
> > > f858159..d8670e4 100644
> > > --- a/arch/x86/kvm/vmx.c
> > > +++ b/arch/x86/kvm/vmx.c
> > > @@ -5407,32 +5407,67 @@ static inline int vmcs_field_readonly(unsigned
> > long field)
> > >   * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
> > >   * 64-bit fields are to be returned).
> > >   */
> > > -static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
> > > -					unsigned long field, u64 *ret)
> > > +static inline u64 vmcs12_read(struct kvm_vcpu *vcpu, unsigned long
> > > +field)
> > >  {
> > >  	short offset = vmcs_field_to_offset(field);
> > >  	char *p;
> > >
> > > -	if (offset < 0)
> > > +	if (offset < 0) {
> > > +		nested_vmx_failValid(vcpu,
> > VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > > +		skip_emulated_instruction(vcpu);
> > >  		return 0;
> > > +	}
> > >
> > >  	p = ((char *)(get_vmcs12(vcpu))) + offset;
> > >
> > >  	switch (vmcs_field_type(field)) {
> > >  	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> > > -		*ret = *((natural_width *)p);
> > > +		return *((natural_width *)p);
> > > +	case VMCS_FIELD_TYPE_U16:
> > > +		return *((u16 *)p);
> > > +	case VMCS_FIELD_TYPE_U32:
> > > +		return *((u32 *)p);
> > > +	case VMCS_FIELD_TYPE_U64:
> > > +		return *((u64 *)p);
> > > +	default:
> > > +		nested_vmx_failValid(vcpu,
> > VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > > +		skip_emulated_instruction(vcpu);
> > > +		return 0; /* can never happen. */
> > > +	}
> > > +}
> > > +
> > > +static inline int vmcs12_write(struct kvm_vcpu *vcpu,
> > > +			unsigned long field,
> > > +			u64 value)
> > > +{
> > > +	short offset = vmcs_field_to_offset(field);
> > > +	char *p;
> > > +
> > > +	if (offset < 0) {
> > > +		nested_vmx_failValid(vcpu,
> > VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > > +		skip_emulated_instruction(vcpu);
> > > +		return 0;
> > > +	}
> > > +
> > > +	p = ((char *)(get_vmcs12(vcpu))) + offset;
> > > +
> > > +	switch (vmcs_field_type(field)) {
> > > +	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> > > +		*(natural_width *)p = value;
> > >  		return 1;
> > >  	case VMCS_FIELD_TYPE_U16:
> > > -		*ret = *((u16 *)p);
> > > +		*(u16 *)p = value;
> > >  		return 1;
> > >  	case VMCS_FIELD_TYPE_U32:
> > > -		*ret = *((u32 *)p);
> > > +		*(u32 *)p = value;
> > >  		return 1;
> > >  	case VMCS_FIELD_TYPE_U64:
> > > -		*ret = *((u64 *)p);
> > > +		*(u64 *)p = value;
> > >  		return 1;
> > >  	default:
> > > -		return 0; /* can never happen. */
> > > +		nested_vmx_failValid(vcpu,
> > VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > > +		skip_emulated_instruction(vcpu);
> > > +		return 0;
> > >  	}
> > >  }
> > >
> > > @@ -5466,11 +5501,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
> > >  	/* Decode instruction info and find the field to read */
> > >  	field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
> > >  	/* Read the field, zero-extended to a u64 field_value */
> > > -	if (!vmcs12_read_any(vcpu, field, &field_value)) {
> > > -		nested_vmx_failValid(vcpu,
> > VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > > -		skip_emulated_instruction(vcpu);
> > > -		return 1;
> > > -	}
> > > +	field_value = vmcs12_read(vcpu, field);
> > You do not handle failure here and always write back field_value even if
> > vmcs12_read() failed. Actually now it is impossible to detect a failure. Call to
> > nested_vmx_failValid() in vmcs12_read() will be overwritten by call to
> > nested_vmx_succeed() at the end of
> > handle_vmread() and skip_emulated_instruction() will be called twice.
> 
> Thanks Gleb and Orit to raise this issue.
> 
> What about moving the offset check outside the vmcs12_read() and vmcs12_write() function, and put it directly in handle_vmread() and handle_vmwrite()?
> I think we only need to do offset error check in handle_vmread() and handle_vmwrite() since they are to emulate correct behavior for guest VMM. For example, if guest VMM reads a field that is not valid or writes a field that is read only, then in emulation code handle_vmread() and handle_vmwrite, we need to raise error to guest VMM.
> For other calling of vmcs12_read() and vmcs12_write() functions in KVM hypervisor (see PATCH 3/4), actually the caller needs to ensure the field should be valid. Otherwise it is a bug in KVM hypervisor itself.
> Does it make sense? If so, I will revise this patch according to above.
> 
Yes, it makes sense.

> Thanks,
> Dongxiao
> 
> > 
> > > +             skip_emulated_instruction(vcpu);
> > 
> > >  	/*
> > >  	 * Now copy part of this value to register or memory, as requested.
> > >  	 * Note that the number of bits actually copied is 32 or 64
> > > depending @@ -5500,8 +5531,6 @@ static int handle_vmwrite(struct
> > kvm_vcpu *vcpu)
> > >  	gva_t gva;
> > >  	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> > >  	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> > > -	char *p;
> > > -	short offset;
> > >  	/* The value to write might be 32 or 64 bits, depending on L1's long
> > >  	 * mode, and eventually we need to write that into a field of several
> > >  	 * possible lengths. The code below first zero-extends the value to
> > > 64 @@ -5537,33 +5566,8 @@ static int handle_vmwrite(struct kvm_vcpu
> > *vcpu)
> > >  		skip_emulated_instruction(vcpu);
> > >  		return 1;
> > >  	}
> > > -
> > > -	offset = vmcs_field_to_offset(field);
> > > -	if (offset < 0) {
> > > -		nested_vmx_failValid(vcpu,
> > VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > > -		skip_emulated_instruction(vcpu);
> > > -		return 1;
> > > -	}
> > > -	p = ((char *) get_vmcs12(vcpu)) + offset;
> > > -
> > > -	switch (vmcs_field_type(field)) {
> > > -	case VMCS_FIELD_TYPE_U16:
> > > -		*(u16 *)p = field_value;
> > > -		break;
> > > -	case VMCS_FIELD_TYPE_U32:
> > > -		*(u32 *)p = field_value;
> > > -		break;
> > > -	case VMCS_FIELD_TYPE_U64:
> > > -		*(u64 *)p = field_value;
> > > -		break;
> > > -	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
> > > -		*(natural_width *)p = field_value;
> > > -		break;
> > > -	default:
> > > -		nested_vmx_failValid(vcpu,
> > VMXERR_UNSUPPORTED_VMCS_COMPONENT);
> > > -		skip_emulated_instruction(vcpu);
> > > +	if (!vmcs12_write(vcpu, field, field_value))
> > >  		return 1;
> > > -	}
> > >
> > >  	nested_vmx_succeed(vcpu);
> > >  	skip_emulated_instruction(vcpu);
> > > --
> > > 1.7.1
> > 
> > --
> > 			Gleb.

--
			Gleb.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2012-11-22 19:31 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-11-21  9:04 [PATCH 0/4] nested vmx code clean up and restructure Dongxiao Xu
2012-11-21  9:04 ` [PATCH 1/4] nested vmx: clean up for vmcs12 read and write Dongxiao Xu
2012-11-21 13:04   ` Gleb Natapov
2012-11-22  1:07     ` Xu, Dongxiao
2012-11-21 13:27   ` Gleb Natapov
2012-11-22  3:16     ` Xu, Dongxiao
2012-11-22  6:45       ` Gleb Natapov
2012-11-21 13:38   ` Orit Wasserman
2012-11-21  9:04 ` [PATCH 2/4] nested vmx: clean up for nested_cpu_has_xxx functions Dongxiao Xu
2012-11-21  9:04 ` [PATCH 3/4] nested vmx: use vmcs12_read/write() to operate VMCS fields Dongxiao Xu
2012-11-21  9:04 ` [PATCH 4/4] nested vmx: use a list to store the launched vmcs12 for L1 VMM Dongxiao Xu
2012-11-21 14:15   ` Gleb Natapov
2012-11-22  1:17     ` Xu, Dongxiao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.