kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 1/4] KVM: VMX: use kvm_event_needs_reinjection
@ 2017-08-24 10:35 Wanpeng Li
  2017-08-24 10:35 ` [PATCH v4 2/4] KVM: X86: Fix loss of exception which has not yet injected Wanpeng Li
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Wanpeng Li @ 2017-08-24 10:35 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Paolo Bonzini, Radim Krčmář, Wanpeng Li

From: Wanpeng Li <wanpeng.li@hotmail.com>

Use kvm_event_needs_reinjection() encapsulation.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
 arch/x86/kvm/vmx.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index dd710d3..c5f43ab 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10988,9 +10988,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	if (vcpu->arch.exception.pending ||
-		vcpu->arch.nmi_injected ||
-		vcpu->arch.interrupt.pending)
+	if (kvm_event_needs_reinjection(vcpu))
 		return -EBUSY;
 
 	if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v4 2/4] KVM: X86: Fix loss of exception which has not yet injected
  2017-08-24 10:35 [PATCH v4 1/4] KVM: VMX: use kvm_event_needs_reinjection Wanpeng Li
@ 2017-08-24 10:35 ` Wanpeng Li
  2017-08-24 11:36   ` Paolo Bonzini
  2017-08-24 10:35 ` [PATCH v4 3/4] KVM: VMX: Move the nested_vmx_inject_exception_vmexit call from nested_vmx_check_exception to vmx_queue_exception Wanpeng Li
  2017-08-24 10:35 ` [PATCH v4 4/4] KVM: nVMX: Fix trying to cancel vmlauch/vmresume Wanpeng Li
  2 siblings, 1 reply; 5+ messages in thread
From: Wanpeng Li @ 2017-08-24 10:35 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Paolo Bonzini, Radim Krčmář, Wanpeng Li

From: Wanpeng Li <wanpeng.li@hotmail.com>

vmx_complete_interrupts() assumes that the exception is always injected, 
so it would be dropped by kvm_clear_exception_queue(). This patch separates 
exception.pending from exception.injected, exception.inject represents the 
exception is injected or the exception should be reinjected due to vmexit 
occurs during event delivery in VMX non-root operation. exception.pending 
represents the exception is queued and will be cleared when injecting the 
exception to the guest. So exception.pending and exception.injected can 
cooperate to guarantee exception will not be lost.

Reported-by: Radim Krčmář <rkrcmar@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
v3 -> v4: 
 * avoid the double fault
 * exception must be injected immediately
v2 -> v3:
 * the changes to inject_pending_event and adds the WARN_ON
 * combine injected and pending exception for GET/SET_VCPU_EVENTS

 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/svm.c              |  2 +-
 arch/x86/kvm/vmx.c              |  4 +--
 arch/x86/kvm/x86.c              | 78 +++++++++++++++++++++++++++--------------
 arch/x86/kvm/x86.h              |  4 +--
 5 files changed, 57 insertions(+), 33 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9d90787..6e385ac 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -547,8 +547,8 @@ struct kvm_vcpu_arch {
 
 	struct kvm_queued_exception {
 		bool pending;
+		bool injected;
 		bool has_error_code;
-		bool reinject;
 		u8 nr;
 		u32 error_code;
 		u8 nested_apf;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a2fddce..6a439a1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -655,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	unsigned nr = vcpu->arch.exception.nr;
 	bool has_error_code = vcpu->arch.exception.has_error_code;
-	bool reinject = vcpu->arch.exception.reinject;
+	bool reinject = vcpu->arch.exception.injected;
 	u32 error_code = vcpu->arch.exception.error_code;
 
 	/*
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c5f43ab..902b780 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2503,7 +2503,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned nr = vcpu->arch.exception.nr;
 	bool has_error_code = vcpu->arch.exception.has_error_code;
-	bool reinject = vcpu->arch.exception.reinject;
+	bool reinject = vcpu->arch.exception.injected;
 	u32 error_code = vcpu->arch.exception.error_code;
 	u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
@@ -10948,7 +10948,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
 	u32 idt_vectoring;
 	unsigned int nr;
 
-	if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
+	if (vcpu->arch.exception.injected) {
 		nr = vcpu->arch.exception.nr;
 		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f41b88..fa79cd7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -389,15 +389,20 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 
-	if (!vcpu->arch.exception.pending) {
+	if (!vcpu->arch.exception.pending ||
+		!vcpu->arch.exception.injected) {
 	queue:
 		if (has_error && !is_protmode(vcpu))
 			has_error = false;
-		vcpu->arch.exception.pending = true;
+		if (reinject)
+			vcpu->arch.exception.injected = true;
+		else {
+			vcpu->arch.exception.pending = true;
+			vcpu->arch.exception.injected = false;
+		}
 		vcpu->arch.exception.has_error_code = has_error;
 		vcpu->arch.exception.nr = nr;
 		vcpu->arch.exception.error_code = error_code;
-		vcpu->arch.exception.reinject = reinject;
 		return;
 	}
 
@@ -3069,8 +3074,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 					       struct kvm_vcpu_events *events)
 {
 	process_nmi(vcpu);
+	/*
+	 * FIXME: pass injected and pending separately.  This is only
+	 * needed for nested virtualization, whose state cannot be
+	 * migrated yet.  For now we combine them just in case.
+	 */
 	events->exception.injected =
-		vcpu->arch.exception.pending &&
+		(vcpu->arch.exception.pending ||
+		 vcpu->arch.exception.injected) &&
 		!kvm_exception_is_soft(vcpu->arch.exception.nr);
 	events->exception.nr = vcpu->arch.exception.nr;
 	events->exception.has_error_code = vcpu->arch.exception.has_error_code;
@@ -3125,6 +3136,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	process_nmi(vcpu);
+	vcpu->arch.exception.injected = false;
 	vcpu->arch.exception.pending = events->exception.injected;
 	vcpu->arch.exception.nr = events->exception.nr;
 	vcpu->arch.exception.has_error_code = events->exception.has_error_code;
@@ -6350,33 +6362,25 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 	int r;
 
 	/* try to reinject previous events if any */
-	if (vcpu->arch.exception.pending) {
-		trace_kvm_inj_exception(vcpu->arch.exception.nr,
-					vcpu->arch.exception.has_error_code,
-					vcpu->arch.exception.error_code);
-
-		if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
-			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
-					     X86_EFLAGS_RF);
-
-		if (vcpu->arch.exception.nr == DB_VECTOR &&
-		    (vcpu->arch.dr7 & DR7_GD)) {
-			vcpu->arch.dr7 &= ~DR7_GD;
-			kvm_update_dr7(vcpu);
-		}
-
+	if (vcpu->arch.exception.injected) {
 		kvm_x86_ops->queue_exception(vcpu);
 		return 0;
 	}
 
-	if (vcpu->arch.nmi_injected) {
-		kvm_x86_ops->set_nmi(vcpu);
-		return 0;
-	}
+	/*
+	 * Exceptions must be injected immediately, or the exception
+	 * frame will have the address of the NMI or interrupt handler.
+	 */
+	if (!vcpu->arch.exception.pending) {
+		if (vcpu->arch.nmi_injected) {
+			kvm_x86_ops->set_nmi(vcpu);
+			return 0;
+		}
 
-	if (vcpu->arch.interrupt.pending) {
-		kvm_x86_ops->set_irq(vcpu);
-		return 0;
+		if (vcpu->arch.interrupt.pending) {
+			kvm_x86_ops->set_irq(vcpu);
+			return 0;
+		}
 	}
 
 	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
@@ -6386,7 +6390,25 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 	}
 
 	/* try to inject new event if pending */
-	if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
+	if (vcpu->arch.exception.pending) {
+		trace_kvm_inj_exception(vcpu->arch.exception.nr,
+					vcpu->arch.exception.has_error_code,
+					vcpu->arch.exception.error_code);
+
+		vcpu->arch.exception.pending = false;
+		vcpu->arch.exception.injected = true;
+		kvm_x86_ops->queue_exception(vcpu);
+
+		if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
+			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
+					     X86_EFLAGS_RF);
+
+		if (vcpu->arch.exception.nr == DB_VECTOR &&
+		    (vcpu->arch.dr7 & DR7_GD)) {
+			vcpu->arch.dr7 &= ~DR7_GD;
+			kvm_update_dr7(vcpu);
+		}
+	} else if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
 		vcpu->arch.smi_pending = false;
 		enter_smm(vcpu);
 	} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
@@ -6862,6 +6884,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 				kvm_x86_ops->enable_nmi_window(vcpu);
 			if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
 				kvm_x86_ops->enable_irq_window(vcpu);
+			WARN_ON(vcpu->arch.exception.pending);
 		}
 
 		if (kvm_lapic_enabled(vcpu)) {
@@ -7738,6 +7761,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vcpu->arch.nmi_injected = false;
 	kvm_clear_interrupt_queue(vcpu);
 	kvm_clear_exception_queue(vcpu);
+	vcpu->arch.exception.pending = false;
 
 	memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
 	kvm_update_dr0123(vcpu);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 1134603..e6ec0de 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -11,7 +11,7 @@
 
 static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.exception.pending = false;
+	vcpu->arch.exception.injected = false;
 }
 
 static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
@@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
 
 static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
+	return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending ||
 		vcpu->arch.nmi_injected;
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v4 3/4] KVM: VMX: Move the nested_vmx_inject_exception_vmexit call from nested_vmx_check_exception to vmx_queue_exception
  2017-08-24 10:35 [PATCH v4 1/4] KVM: VMX: use kvm_event_needs_reinjection Wanpeng Li
  2017-08-24 10:35 ` [PATCH v4 2/4] KVM: X86: Fix loss of exception which has not yet injected Wanpeng Li
@ 2017-08-24 10:35 ` Wanpeng Li
  2017-08-24 10:35 ` [PATCH v4 4/4] KVM: nVMX: Fix trying to cancel vmlauch/vmresume Wanpeng Li
  2 siblings, 0 replies; 5+ messages in thread
From: Wanpeng Li @ 2017-08-24 10:35 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Paolo Bonzini, Radim Krčmář, Wanpeng Li

From: Wanpeng Li <wanpeng.li@hotmail.com>

Move the nested_vmx_inject_exception_vmexit call from nested_vmx_check_exception
to vmx_queue_exception.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
 arch/x86/kvm/vmx.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 902b780..21760b8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2459,15 +2459,14 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
  * KVM wants to inject page-faults which it got to the guest. This function
  * checks whether in a nested guest, we need to inject them to L1 or L2.
  */
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 	unsigned int nr = vcpu->arch.exception.nr;
 
 	if (nr == PF_VECTOR) {
 		if (vcpu->arch.exception.nested_apf) {
-			nested_vmx_inject_exception_vmexit(vcpu,
-							   vcpu->arch.apf.nested_apf_token);
+			*exit_qual = vcpu->arch.apf.nested_apf_token;
 			return 1;
 		}
 		/*
@@ -2481,16 +2480,15 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
 		 */
 		if (nested_vmx_is_page_fault_vmexit(vmcs12,
 						    vcpu->arch.exception.error_code)) {
-			nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2);
+			*exit_qual = vcpu->arch.cr2;
 			return 1;
 		}
 	} else {
-		unsigned long exit_qual = 0;
-		if (nr == DB_VECTOR)
-			exit_qual = vcpu->arch.dr6;
-
 		if (vmcs12->exception_bitmap & (1u << nr)) {
-			nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+			if (nr == DB_VECTOR)
+				*exit_qual = vcpu->arch.dr6;
+			else
+				*exit_qual = 0;
 			return 1;
 		}
 	}
@@ -2506,10 +2504,13 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 	bool reinject = vcpu->arch.exception.injected;
 	u32 error_code = vcpu->arch.exception.error_code;
 	u32 intr_info = nr | INTR_INFO_VALID_MASK;
+	unsigned long exit_qual;
 
 	if (!reinject && is_guest_mode(vcpu) &&
-	    nested_vmx_check_exception(vcpu))
+	    nested_vmx_check_exception(vcpu, &exit_qual)) {
+		nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
 		return;
+	}
 
 	if (has_error_code) {
 		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v4 4/4] KVM: nVMX: Fix trying to cancel vmlauch/vmresume
  2017-08-24 10:35 [PATCH v4 1/4] KVM: VMX: use kvm_event_needs_reinjection Wanpeng Li
  2017-08-24 10:35 ` [PATCH v4 2/4] KVM: X86: Fix loss of exception which has not yet injected Wanpeng Li
  2017-08-24 10:35 ` [PATCH v4 3/4] KVM: VMX: Move the nested_vmx_inject_exception_vmexit call from nested_vmx_check_exception to vmx_queue_exception Wanpeng Li
@ 2017-08-24 10:35 ` Wanpeng Li
  2 siblings, 0 replies; 5+ messages in thread
From: Wanpeng Li @ 2017-08-24 10:35 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Paolo Bonzini, Radim Krčmář, Wanpeng Li

From: Wanpeng Li <wanpeng.li@hotmail.com>

------------[ cut here ]------------
WARNING: CPU: 7 PID: 3861 at /home/kernel/ssd/kvm/arch/x86/kvm//vmx.c:11299 nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
CPU: 7 PID: 3861 Comm: qemu-system-x86 Tainted: G        W  OE   4.13.0-rc4+ #11
RIP: 0010:nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
Call Trace:
 ? kvm_multiple_exception+0x149/0x170 [kvm]
 ? handle_emulation_failure+0x79/0x230 [kvm]
 ? load_vmcs12_host_state+0xa80/0xa80 [kvm_intel]
 ? check_chain_key+0x137/0x1e0
 ? reexecute_instruction.part.168+0x130/0x130 [kvm]
 nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
 ? nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
 vmx_queue_exception+0x197/0x300 [kvm_intel]
 kvm_arch_vcpu_ioctl_run+0x1b0c/0x2c90 [kvm]
 ? kvm_arch_vcpu_runnable+0x220/0x220 [kvm]
 ? preempt_count_sub+0x18/0xc0
 ? restart_apic_timer+0x17d/0x300 [kvm]
 ? kvm_lapic_restart_hv_timer+0x37/0x50 [kvm]
 ? kvm_arch_vcpu_load+0x1d8/0x350 [kvm]
 kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
 ? kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
 ? kvm_dev_ioctl+0xbe0/0xbe0 [kvm]

The flag "nested_run_pending", which can override the decision of which should run 
next, L1 or L2. nested_run_pending=1 means that we *must* run L2 next, not L1. This 
is necessary in particular when L1 did a VMLAUNCH of L2 and therefore expects L2 to 
be run (and perhaps be injected with an event it specified, etc.). Nested_run_pending 
is especially intended to avoid switching  to L1 in the injection decision-point.

I catch this in the queue exception path, this patch fixes it by requesting 
an immediate VM exit from L2 and keeping the exception for L1 pending for a 
subsequent nested VM exit.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
v3 -> v4:
 * vmx_check_nested_events clear vcpu->ex.pending

 arch/x86/kvm/vmx.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 21760b8..7e106d7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2501,16 +2501,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned nr = vcpu->arch.exception.nr;
 	bool has_error_code = vcpu->arch.exception.has_error_code;
-	bool reinject = vcpu->arch.exception.injected;
 	u32 error_code = vcpu->arch.exception.error_code;
 	u32 intr_info = nr | INTR_INFO_VALID_MASK;
-	unsigned long exit_qual;
-
-	if (!reinject && is_guest_mode(vcpu) &&
-	    nested_vmx_check_exception(vcpu, &exit_qual)) {
-		nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
-		return;
-	}
 
 	if (has_error_code) {
 		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
@@ -10988,10 +10980,20 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
 static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	unsigned long exit_qual;
 
 	if (kvm_event_needs_reinjection(vcpu))
 		return -EBUSY;
 
+	if (vcpu->arch.exception.pending &&
+		nested_vmx_check_exception(vcpu, &exit_qual)) {
+		if (vmx->nested.nested_run_pending)
+			return -EBUSY;
+		vcpu->arch.exception.pending = false;
+		nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+		return 0;
+	}
+
 	if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
 	    vmx->nested.preemption_timer_expired) {
 		if (vmx->nested.nested_run_pending)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v4 2/4] KVM: X86: Fix loss of exception which has not yet injected
  2017-08-24 10:35 ` [PATCH v4 2/4] KVM: X86: Fix loss of exception which has not yet injected Wanpeng Li
@ 2017-08-24 11:36   ` Paolo Bonzini
  0 siblings, 0 replies; 5+ messages in thread
From: Paolo Bonzini @ 2017-08-24 11:36 UTC (permalink / raw)
  To: Wanpeng Li, linux-kernel, kvm; +Cc: Radim Krčmář, Wanpeng Li

On 24/08/2017 12:35, Wanpeng Li wrote:
> From: Wanpeng Li <wanpeng.li@hotmail.com>
> 
> vmx_complete_interrupts() assumes that the exception is always injected, 
> so it would be dropped by kvm_clear_exception_queue(). This patch separates 
> exception.pending from exception.injected, exception.inject represents the 
> exception is injected or the exception should be reinjected due to vmexit 
> occurs during event delivery in VMX non-root operation. exception.pending 
> represents the exception is queued and will be cleared when injecting the 
> exception to the guest. So exception.pending and exception.injected can 
> cooperate to guarantee exception will not be lost.
> 
> Reported-by: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
> ---
> v3 -> v4: 
>  * avoid the double fault
>  * exception must be injected immediately
> v2 -> v3:
>  * the changes to inject_pending_event and adds the WARN_ON
>  * combine injected and pending exception for GET/SET_VCPU_EVENTS
> 
>  arch/x86/include/asm/kvm_host.h |  2 +-
>  arch/x86/kvm/svm.c              |  2 +-
>  arch/x86/kvm/vmx.c              |  4 +--
>  arch/x86/kvm/x86.c              | 78 +++++++++++++++++++++++++++--------------
>  arch/x86/kvm/x86.h              |  4 +--
>  5 files changed, 57 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 9d90787..6e385ac 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -547,8 +547,8 @@ struct kvm_vcpu_arch {
>  
>  	struct kvm_queued_exception {
>  		bool pending;
> +		bool injected;
>  		bool has_error_code;
> -		bool reinject;
>  		u8 nr;
>  		u32 error_code;
>  		u8 nested_apf;
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index a2fddce..6a439a1 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -655,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  	unsigned nr = vcpu->arch.exception.nr;
>  	bool has_error_code = vcpu->arch.exception.has_error_code;
> -	bool reinject = vcpu->arch.exception.reinject;
> +	bool reinject = vcpu->arch.exception.injected;
>  	u32 error_code = vcpu->arch.exception.error_code;
>  
>  	/*
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index c5f43ab..902b780 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2503,7 +2503,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	unsigned nr = vcpu->arch.exception.nr;
>  	bool has_error_code = vcpu->arch.exception.has_error_code;
> -	bool reinject = vcpu->arch.exception.reinject;
> +	bool reinject = vcpu->arch.exception.injected;
>  	u32 error_code = vcpu->arch.exception.error_code;
>  	u32 intr_info = nr | INTR_INFO_VALID_MASK;
>  
> @@ -10948,7 +10948,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
>  	u32 idt_vectoring;
>  	unsigned int nr;
>  
> -	if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
> +	if (vcpu->arch.exception.injected) {
>  		nr = vcpu->arch.exception.nr;
>  		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8f41b88..fa79cd7 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -389,15 +389,20 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
>  
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
>  
> -	if (!vcpu->arch.exception.pending) {
> +	if (!vcpu->arch.exception.pending ||
> +		!vcpu->arch.exception.injected) {
>  	queue:
>  		if (has_error && !is_protmode(vcpu))
>  			has_error = false;
> -		vcpu->arch.exception.pending = true;
> +		if (reinject)

I'd like to add a WARN_ON here:

+			/*
+			 * vcpu->arch.exception.pending is only true on
+			 * vmentry if an event injection was blocked by
+			 * nested_run_pending.  In that case, however,
+			 * vcpu_enter_guest requests an immediate exit,
+			 * so the guest shouldn't proceed far enough to
+			 * need reinjection.
+			 */
+			WARN_ON_ONCE(vcpu->arch.exception.pending);

I'll test it and add it if it is useful.

> +			vcpu->arch.exception.injected = true;
> +		else {
> +			vcpu->arch.exception.pending = true;
> +			vcpu->arch.exception.injected = false;
> +		}
>  		vcpu->arch.exception.has_error_code = has_error;
>  		vcpu->arch.exception.nr = nr;
>  		vcpu->arch.exception.error_code = error_code;
> -		vcpu->arch.exception.reinject = reinject;
>  		return;
>  	}

The double fault case can still result in pending && injected.  We need
to set exception.pending = true to allow the double fault to trigger a
vmexit, but exception.injected must be reset.

Otherwise looks good.  This was exhausting...

Paolo

> @@ -3069,8 +3074,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
>  					       struct kvm_vcpu_events *events)
>  {
>  	process_nmi(vcpu);
> +	/*
> +	 * FIXME: pass injected and pending separately.  This is only
> +	 * needed for nested virtualization, whose state cannot be
> +	 * migrated yet.  For now we combine them just in case.
> +	 */
>  	events->exception.injected =
> -		vcpu->arch.exception.pending &&
> +		(vcpu->arch.exception.pending ||
> +		 vcpu->arch.exception.injected) &&
>  		!kvm_exception_is_soft(vcpu->arch.exception.nr);
>  	events->exception.nr = vcpu->arch.exception.nr;
>  	events->exception.has_error_code = vcpu->arch.exception.has_error_code;
> @@ -3125,6 +3136,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
>  		return -EINVAL;
>  
>  	process_nmi(vcpu);
> +	vcpu->arch.exception.injected = false;
>  	vcpu->arch.exception.pending = events->exception.injected;
>  	vcpu->arch.exception.nr = events->exception.nr;
>  	vcpu->arch.exception.has_error_code = events->exception.has_error_code;
> @@ -6350,33 +6362,25 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
>  	int r;
>  
>  	/* try to reinject previous events if any */
> -	if (vcpu->arch.exception.pending) {
> -		trace_kvm_inj_exception(vcpu->arch.exception.nr,
> -					vcpu->arch.exception.has_error_code,
> -					vcpu->arch.exception.error_code);
> -
> -		if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
> -			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
> -					     X86_EFLAGS_RF);
> -
> -		if (vcpu->arch.exception.nr == DB_VECTOR &&
> -		    (vcpu->arch.dr7 & DR7_GD)) {
> -			vcpu->arch.dr7 &= ~DR7_GD;
> -			kvm_update_dr7(vcpu);
> -		}
> -
> +	if (vcpu->arch.exception.injected) {
>  		kvm_x86_ops->queue_exception(vcpu);
>  		return 0;
>  	}
>  
> -	if (vcpu->arch.nmi_injected) {
> -		kvm_x86_ops->set_nmi(vcpu);
> -		return 0;
> -	}
> +	/*
> +	 * Exceptions must be injected immediately, or the exception
> +	 * frame will have the address of the NMI or interrupt handler.
> +	 */
> +	if (!vcpu->arch.exception.pending) {
> +		if (vcpu->arch.nmi_injected) {
> +			kvm_x86_ops->set_nmi(vcpu);
> +			return 0;
> +		}
>  
> -	if (vcpu->arch.interrupt.pending) {
> -		kvm_x86_ops->set_irq(vcpu);
> -		return 0;
> +		if (vcpu->arch.interrupt.pending) {
> +			kvm_x86_ops->set_irq(vcpu);
> +			return 0;
> +		}
>  	}
>  
>  	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
> @@ -6386,7 +6390,25 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
>  	}
>  
>  	/* try to inject new event if pending */
> -	if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
> +	if (vcpu->arch.exception.pending) {
> +		trace_kvm_inj_exception(vcpu->arch.exception.nr,
> +					vcpu->arch.exception.has_error_code,
> +					vcpu->arch.exception.error_code);
> +
> +		vcpu->arch.exception.pending = false;
> +		vcpu->arch.exception.injected = true;
> +		kvm_x86_ops->queue_exception(vcpu);
> +
> +		if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
> +			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
> +					     X86_EFLAGS_RF);
> +
> +		if (vcpu->arch.exception.nr == DB_VECTOR &&
> +		    (vcpu->arch.dr7 & DR7_GD)) {
> +			vcpu->arch.dr7 &= ~DR7_GD;
> +			kvm_update_dr7(vcpu);
> +		}
> +	} else if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
>  		vcpu->arch.smi_pending = false;
>  		enter_smm(vcpu);
>  	} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
> @@ -6862,6 +6884,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  				kvm_x86_ops->enable_nmi_window(vcpu);
>  			if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
>  				kvm_x86_ops->enable_irq_window(vcpu);
> +			WARN_ON(vcpu->arch.exception.pending);
>  		}
>  
>  		if (kvm_lapic_enabled(vcpu)) {
> @@ -7738,6 +7761,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>  	vcpu->arch.nmi_injected = false;
>  	kvm_clear_interrupt_queue(vcpu);
>  	kvm_clear_exception_queue(vcpu);
> +	vcpu->arch.exception.pending = false;
>  
>  	memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
>  	kvm_update_dr0123(vcpu);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 1134603..e6ec0de 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -11,7 +11,7 @@
>  
>  static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
>  {
> -	vcpu->arch.exception.pending = false;
> +	vcpu->arch.exception.injected = false;
>  }
>  
>  static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
> @@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
>  
>  static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
>  {
> -	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
> +	return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending ||
>  		vcpu->arch.nmi_injected;
>  }
>  
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-08-24 11:36 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-24 10:35 [PATCH v4 1/4] KVM: VMX: use kvm_event_needs_reinjection Wanpeng Li
2017-08-24 10:35 ` [PATCH v4 2/4] KVM: X86: Fix loss of exception which has not yet injected Wanpeng Li
2017-08-24 11:36   ` Paolo Bonzini
2017-08-24 10:35 ` [PATCH v4 3/4] KVM: VMX: Move the nested_vmx_inject_exception_vmexit call from nested_vmx_check_exception to vmx_queue_exception Wanpeng Li
2017-08-24 10:35 ` [PATCH v4 4/4] KVM: nVMX: Fix trying to cancel vmlauch/vmresume Wanpeng Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).