[PATCH 3/5] KVM: VMX: Move skip_emulated_instruction out of nested_vmx_check_vmcs12

From: Kyle Huey <me@kylehuey.com>
To: "Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Ingo Molnar" <mingo@redhat.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	x86@kernel.org, "Joerg Roedel" <joro@8bytes.org>
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 3/5] KVM: VMX: Move skip_emulated_instruction out of nested_vmx_check_vmcs12
Date: Sun, 27 Nov 2016 20:18:54 -0800	[thread overview]
Message-ID: <20161128041856.11420-4-khuey@kylehuey.com> (raw)
In-Reply-To: <20161128041856.11420-1-khuey@kylehuey.com>

We can't return both the pass/fail boolean for the vmcs and the upcoming
continue/exit-to-userspace boolean for skip_emulated_instruction out of
nested_vmx_check_vmcs, so move skip_emulated_instruction out of it instead.

Additionally, VMENTER/VMRESUME only trigger singlestep exceptions when
they advance the IP to the following instruction, not when they a) succeed,
b) fail MSR validation or c) throw an exception. Add a
skip_emulated_instruction_no_trap variant that will not check RFLAGS.TF.

Signed-off-by: Kyle Huey <khuey@kylehuey.com>
---
 arch/x86/kvm/vmx.c | 60 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f2f9cf5..9cc4c41 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2455,28 +2455,33 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
 		interruptibility |= GUEST_INTR_STATE_MOV_SS;
 	else if (mask & KVM_X86_SHADOW_INT_STI)
 		interruptibility |= GUEST_INTR_STATE_STI;
 
 	if ((interruptibility != interruptibility_old))
 		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
 }
 
-static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+static void skip_emulated_instruction_no_trap(struct kvm_vcpu *vcpu)
 {
 	unsigned long rip;
 
 	rip = kvm_rip_read(vcpu);
 	rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
 	kvm_rip_write(vcpu, rip);
 
 	/* skipping an emulated instruction also counts */
 	vmx_set_interrupt_shadow(vcpu, 0);
 }
 
+static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+	skip_emulated_instruction_no_trap(vcpu);
+}
+
 /*
  * KVM wants to inject page-faults which it got to the guest. This function
  * checks whether in a nested guest, we need to inject them to L1 or L2.
  */
 static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
@@ -7319,33 +7324,36 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
  * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was
  * used before) all generate the same failure when it is missing.
  */
 static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	if (vmx->nested.current_vmptr == -1ull) {
 		nested_vmx_failInvalid(vcpu);
-		skip_emulated_instruction(vcpu);
 		return 0;
 	}
 	return 1;
 }
 
 static int handle_vmread(struct kvm_vcpu *vcpu)
 {
 	unsigned long field;
 	u64 field_value;
 	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 	gva_t gva = 0;
 
-	if (!nested_vmx_check_permission(vcpu) ||
-	    !nested_vmx_check_vmcs12(vcpu))
+	if (!nested_vmx_check_permission(vcpu))
+		return 1;
+
+	if (!nested_vmx_check_vmcs12(vcpu)) {
+		skip_emulated_instruction(vcpu);
 		return 1;
+	}
 
 	/* Decode instruction info and find the field to read */
 	field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
 	/* Read the field, zero-extended to a u64 field_value */
 	if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 		skip_emulated_instruction(vcpu);
 		return 1;
@@ -7383,19 +7391,23 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 	 * mode, and eventually we need to write that into a field of several
 	 * possible lengths. The code below first zero-extends the value to 64
 	 * bit (field_value), and then copies only the appropriate number of
 	 * bits into the vmcs12 field.
 	 */
 	u64 field_value = 0;
 	struct x86_exception e;
 
-	if (!nested_vmx_check_permission(vcpu) ||
-	    !nested_vmx_check_vmcs12(vcpu))
+	if (!nested_vmx_check_permission(vcpu))
+		return 1;
+
+	if (!nested_vmx_check_vmcs12(vcpu)) {
+		skip_emulated_instruction(vcpu);
 		return 1;
+	}
 
 	if (vmx_instruction_info & (1u << 10))
 		field_value = kvm_register_readl(vcpu,
 			(((vmx_instruction_info) >> 3) & 0xf));
 	else {
 		if (get_vmx_mem_address(vcpu, exit_qualification,
 				vmx_instruction_info, false, &gva))
 			return 1;
@@ -10041,21 +10053,22 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 {
 	struct vmcs12 *vmcs12;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int cpu;
 	struct loaded_vmcs *vmcs02;
 	bool ia32e;
 	u32 msr_entry_idx;
 
-	if (!nested_vmx_check_permission(vcpu) ||
-	    !nested_vmx_check_vmcs12(vcpu))
+	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	skip_emulated_instruction(vcpu);
+	if (!nested_vmx_check_vmcs12(vcpu))
+		goto out;
+
 	vmcs12 = get_vmcs12(vcpu);
 
 	if (enable_shadow_vmcs)
 		copy_shadow_to_vmcs12(vmx);
 
 	/*
 	 * The nested entry process starts with enforcing various prerequisites
 	 * on vmcs12 as required by the Intel SDM, and act appropriately when
@@ -10065,43 +10078,43 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	 * To speed up the normal (success) code path, we should avoid checking
 	 * for misconfigurations which will anyway be caught by the processor
 	 * when using the merged vmcs02.
 	 */
 	if (vmcs12->launch_state == launch) {
 		nested_vmx_failValid(vcpu,
 			launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
 			       : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
-		return 1;
+		goto out;
 	}
 
 	if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
 	    vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) {
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-		return 1;
+		goto out;
 	}
 
 	if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-		return 1;
+		goto out;
 	}
 
 	if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) {
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-		return 1;
+		goto out;
 	}
 
 	if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) {
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-		return 1;
+		goto out;
 	}
 
 	if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) {
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-		return 1;
+		goto out;
 	}
 
 	if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
 				vmx->nested.nested_vmx_true_procbased_ctls_low,
 				vmx->nested.nested_vmx_procbased_ctls_high) ||
 	    !vmx_control_verify(vmcs12->secondary_vm_exec_control,
 				vmx->nested.nested_vmx_secondary_ctls_low,
 				vmx->nested.nested_vmx_secondary_ctls_high) ||
@@ -10111,36 +10124,36 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	    !vmx_control_verify(vmcs12->vm_exit_controls,
 				vmx->nested.nested_vmx_true_exit_ctls_low,
 				vmx->nested.nested_vmx_exit_ctls_high) ||
 	    !vmx_control_verify(vmcs12->vm_entry_controls,
 				vmx->nested.nested_vmx_true_entry_ctls_low,
 				vmx->nested.nested_vmx_entry_ctls_high))
 	{
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-		return 1;
+		goto out;
 	}
 
 	if (((vmcs12->host_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
 	    ((vmcs12->host_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
 		nested_vmx_failValid(vcpu,
 			VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
-		return 1;
+		goto out;
 	}
 
 	if (!nested_cr0_valid(vcpu, vmcs12->guest_cr0) ||
 	    ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
 		nested_vmx_entry_failure(vcpu, vmcs12,
 			EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
-		return 1;
+		goto out;
 	}
 	if (vmcs12->vmcs_link_pointer != -1ull) {
 		nested_vmx_entry_failure(vcpu, vmcs12,
 			EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR);
-		return 1;
+		goto out;
 	}
 
 	/*
 	 * If the load IA32_EFER VM-entry control is 1, the following checks
 	 * are performed on the field for the IA32_EFER MSR:
 	 * - Bits reserved in the IA32_EFER MSR must be 0.
 	 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
 	 *   the IA-32e mode guest VM-exit control. It must also be identical
@@ -10150,17 +10163,17 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) {
 		ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
 		if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
 		    ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
 		    ((vmcs12->guest_cr0 & X86_CR0_PG) &&
 		     ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) {
 			nested_vmx_entry_failure(vcpu, vmcs12,
 				EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
-			return 1;
+			goto out;
 		}
 	}
 
 	/*
 	 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
 	 * IA32_EFER MSR must be 0 in the field for that register. In addition,
 	 * the values of the LMA and LME bits in the field must each be that of
 	 * the host address-space size VM-exit control.
@@ -10168,29 +10181,30 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
 		ia32e = (vmcs12->vm_exit_controls &
 			 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
 		if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
 		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
 		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) {
 			nested_vmx_entry_failure(vcpu, vmcs12,
 				EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
-			return 1;
+			goto out;
 		}
 	}
 
 	/*
 	 * We're finally done with prerequisite checking, and can start with
 	 * the nested entry.
 	 */
 
 	vmcs02 = nested_get_current_vmcs02(vmx);
 	if (!vmcs02)
 		return -ENOMEM;
 
+	skip_emulated_instruction_no_trap(vcpu);
 	enter_guest_mode(vcpu);
 
 	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
 		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
 
 	cpu = get_cpu();
 	vmx->loaded_vmcs = vmcs02;
 	vmx_vcpu_put(vcpu);
@@ -10222,16 +10236,20 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
 	/*
 	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
 	 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
 	 * returned as far as L1 is concerned. It will only return (and set
 	 * the success flag) when L2 exits (see nested_vmx_vmexit()).
 	 */
 	return 1;
+
+out:
+	skip_emulated_instruction(vcpu);
+	return 1;
 }
 
 /*
  * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
  * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK).
  * This function returns the new value we should put in vmcs12.guest_cr0.
  * It's not enough to just return the vmcs02 GUEST_CR0. Rather,
  *  1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
-- 
2.10.2