linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] KVM: nVMX: Fix trying to cancel vmlauch/vmresume
@ 2017-08-18 14:11 Wanpeng Li
  2017-08-21 16:20 ` Radim Krčmář
  0 siblings, 1 reply; 7+ messages in thread
From: Wanpeng Li @ 2017-08-18 14:11 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Paolo Bonzini, Radim Krčmář, Wanpeng Li

From: Wanpeng Li <wanpeng.li@hotmail.com>

------------[ cut here ]------------
WARNING: CPU: 7 PID: 3861 at /home/kernel/ssd/kvm/arch/x86/kvm//vmx.c:11299 nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
CPU: 7 PID: 3861 Comm: qemu-system-x86 Tainted: G        W  OE   4.13.0-rc4+ #11
RIP: 0010:nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
Call Trace:
 ? kvm_multiple_exception+0x149/0x170 [kvm]
 ? handle_emulation_failure+0x79/0x230 [kvm]
 ? load_vmcs12_host_state+0xa80/0xa80 [kvm_intel]
 ? check_chain_key+0x137/0x1e0
 ? reexecute_instruction.part.168+0x130/0x130 [kvm]
 nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
 ? nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
 vmx_queue_exception+0x197/0x300 [kvm_intel]
 kvm_arch_vcpu_ioctl_run+0x1b0c/0x2c90 [kvm]
 ? kvm_arch_vcpu_runnable+0x220/0x220 [kvm]
 ? preempt_count_sub+0x18/0xc0
 ? restart_apic_timer+0x17d/0x300 [kvm]
 ? kvm_lapic_restart_hv_timer+0x37/0x50 [kvm]
 ? kvm_arch_vcpu_load+0x1d8/0x350 [kvm]
 kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
 ? kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
 ? kvm_dev_ioctl+0xbe0/0xbe0 [kvm]

The flag "nested_run_pending", which can override the decision of which should run 
next, L1 or L2. nested_run_pending=1 means that we *must* run L2 next, not L1. This 
is necessary in particular when L1 did a VMLAUNCH of L2 and therefore expects L2 to 
be run (and perhaps be injected with an event it specified, etc.). Nested_run_pending 
is especially intended to avoid switching  to L1 in the injection decision-point.

I catch this in the queue exception path, this patch fixes it by requesting 
an immediate VM exit from L2 and keeping the exception for L1 pending for a 
subsequent nested VM exit.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/svm.c              |  6 ++++--
 arch/x86/kvm/vmx.c              | 15 ++++++++++-----
 arch/x86/kvm/x86.c              |  4 ++--
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6db0ed9..e1e6f00 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -962,7 +962,7 @@ struct kvm_x86_ops {
 				unsigned char *hypercall_addr);
 	void (*set_irq)(struct kvm_vcpu *vcpu);
 	void (*set_nmi)(struct kvm_vcpu *vcpu);
-	void (*queue_exception)(struct kvm_vcpu *vcpu);
+	int (*queue_exception)(struct kvm_vcpu *vcpu);
 	void (*cancel_injection)(struct kvm_vcpu *vcpu);
 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
 	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c21b49b..bee1937 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -632,7 +632,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	svm_set_interrupt_shadow(vcpu, 0);
 }
 
-static void svm_queue_exception(struct kvm_vcpu *vcpu)
+static int svm_queue_exception(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	unsigned nr = vcpu->arch.exception.nr;
@@ -646,7 +646,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
 	 */
 	if (!reinject &&
 	    nested_svm_check_exception(svm, nr, has_error_code, error_code))
-		return;
+		return 0;
 
 	if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
 		unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
@@ -669,6 +669,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
 		| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
 		| SVM_EVTINJ_TYPE_EXEPT;
 	svm->vmcb->control.event_inj_err = error_code;
+
+	return 0;
 }
 
 static void svm_init_erratum_383(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e398946..cd0ab5d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2500,7 +2500,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void vmx_queue_exception(struct kvm_vcpu *vcpu)
+static int vmx_queue_exception(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned nr = vcpu->arch.exception.nr;
@@ -2509,9 +2509,12 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 	u32 error_code = vcpu->arch.exception.error_code;
 	u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
-	if (!reinject && is_guest_mode(vcpu) &&
-	    nested_vmx_check_exception(vcpu))
-		return;
+	if (!reinject && is_guest_mode(vcpu)) {
+		if (vmx->nested.nested_run_pending)
+			return -EBUSY;
+		if (nested_vmx_check_exception(vcpu))
+			return 0;
+	}
 
 	if (has_error_code) {
 		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
@@ -2524,7 +2527,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 			inc_eip = vcpu->arch.event_exit_inst_len;
 		if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
 			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-		return;
+		return 0;
 	}
 
 	if (kvm_exception_is_soft(nr)) {
@@ -2535,6 +2538,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 		intr_info |= INTR_TYPE_HARD_EXCEPTION;
 
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+
+	return 0;
 }
 
 static bool vmx_rdtscp_supported(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 008a0b1..8e53de2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6356,8 +6356,8 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 			kvm_update_dr7(vcpu);
 		}
 
-		kvm_x86_ops->queue_exception(vcpu);
-		return 0;
+		r = kvm_x86_ops->queue_exception(vcpu);
+		return r;
 	}
 
 	if (vcpu->arch.nmi_injected) {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: nVMX: Fix trying to cancel vmlauch/vmresume
  2017-08-18 14:11 [PATCH v2] KVM: nVMX: Fix trying to cancel vmlauch/vmresume Wanpeng Li
@ 2017-08-21 16:20 ` Radim Krčmář
  2017-08-21 22:55   ` Wanpeng Li
  2017-08-23  7:13   ` Paolo Bonzini
  0 siblings, 2 replies; 7+ messages in thread
From: Radim Krčmář @ 2017-08-21 16:20 UTC (permalink / raw)
  To: Wanpeng Li; +Cc: linux-kernel, kvm, Paolo Bonzini, Wanpeng Li

2017-08-18 07:11-0700, Wanpeng Li:
> From: Wanpeng Li <wanpeng.li@hotmail.com>
> 
> ------------[ cut here ]------------
> WARNING: CPU: 7 PID: 3861 at /home/kernel/ssd/kvm/arch/x86/kvm//vmx.c:11299 nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
> CPU: 7 PID: 3861 Comm: qemu-system-x86 Tainted: G        W  OE   4.13.0-rc4+ #11
> RIP: 0010:nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
> Call Trace:
>  ? kvm_multiple_exception+0x149/0x170 [kvm]
>  ? handle_emulation_failure+0x79/0x230 [kvm]
>  ? load_vmcs12_host_state+0xa80/0xa80 [kvm_intel]
>  ? check_chain_key+0x137/0x1e0
>  ? reexecute_instruction.part.168+0x130/0x130 [kvm]
>  nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
>  ? nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
>  vmx_queue_exception+0x197/0x300 [kvm_intel]
>  kvm_arch_vcpu_ioctl_run+0x1b0c/0x2c90 [kvm]
>  ? kvm_arch_vcpu_runnable+0x220/0x220 [kvm]
>  ? preempt_count_sub+0x18/0xc0
>  ? restart_apic_timer+0x17d/0x300 [kvm]
>  ? kvm_lapic_restart_hv_timer+0x37/0x50 [kvm]
>  ? kvm_arch_vcpu_load+0x1d8/0x350 [kvm]
>  kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
>  ? kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
>  ? kvm_dev_ioctl+0xbe0/0xbe0 [kvm]
> 
> The flag "nested_run_pending", which can override the decision of which should run 
> next, L1 or L2. nested_run_pending=1 means that we *must* run L2 next, not L1. This 
> is necessary in particular when L1 did a VMLAUNCH of L2 and therefore expects L2 to 
> be run (and perhaps be injected with an event it specified, etc.). Nested_run_pending 
> is especially intended to avoid switching  to L1 in the injection decision-point.
> 
> I catch this in the queue exception path, this patch fixes it by requesting 
> an immediate VM exit from L2 and keeping the exception for L1 pending for a 
> subsequent nested VM exit.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
> ---
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> @@ -6356,8 +6356,8 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
>  			kvm_update_dr7(vcpu);
>  		}

Hm, we shouldn't execute the code above if exception won't be injected.

>  
> -		kvm_x86_ops->queue_exception(vcpu);
> -		return 0;

vmx_complete_interrupts() assumes that the exception is always injected,
so it would be dropped by kvm_clear_exception_queue().

I'm starting to wonder whether getting rid of nested_run_pending
wouldn't be nicer.

> +		r = kvm_x86_ops->queue_exception(vcpu);
> +		return r;
>  	}
>  
>  	if (vcpu->arch.nmi_injected) {
> -- 
> 2.7.4
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: nVMX: Fix trying to cancel vmlauch/vmresume
  2017-08-21 16:20 ` Radim Krčmář
@ 2017-08-21 22:55   ` Wanpeng Li
  2017-08-21 23:09     ` Wanpeng Li
  2017-08-23  7:13   ` Paolo Bonzini
  1 sibling, 1 reply; 7+ messages in thread
From: Wanpeng Li @ 2017-08-21 22:55 UTC (permalink / raw)
  To: Radim Krčmář; +Cc: linux-kernel, kvm, Paolo Bonzini, Wanpeng Li

2017-08-22 0:20 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
> 2017-08-18 07:11-0700, Wanpeng Li:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> ------------[ cut here ]------------
>> WARNING: CPU: 7 PID: 3861 at /home/kernel/ssd/kvm/arch/x86/kvm//vmx.c:11299 nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
>> CPU: 7 PID: 3861 Comm: qemu-system-x86 Tainted: G        W  OE   4.13.0-rc4+ #11
>> RIP: 0010:nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
>> Call Trace:
>>  ? kvm_multiple_exception+0x149/0x170 [kvm]
>>  ? handle_emulation_failure+0x79/0x230 [kvm]
>>  ? load_vmcs12_host_state+0xa80/0xa80 [kvm_intel]
>>  ? check_chain_key+0x137/0x1e0
>>  ? reexecute_instruction.part.168+0x130/0x130 [kvm]
>>  nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
>>  ? nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
>>  vmx_queue_exception+0x197/0x300 [kvm_intel]
>>  kvm_arch_vcpu_ioctl_run+0x1b0c/0x2c90 [kvm]
>>  ? kvm_arch_vcpu_runnable+0x220/0x220 [kvm]
>>  ? preempt_count_sub+0x18/0xc0
>>  ? restart_apic_timer+0x17d/0x300 [kvm]
>>  ? kvm_lapic_restart_hv_timer+0x37/0x50 [kvm]
>>  ? kvm_arch_vcpu_load+0x1d8/0x350 [kvm]
>>  kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
>>  ? kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
>>  ? kvm_dev_ioctl+0xbe0/0xbe0 [kvm]
>>
>> The flag "nested_run_pending", which can override the decision of which should run
>> next, L1 or L2. nested_run_pending=1 means that we *must* run L2 next, not L1. This
>> is necessary in particular when L1 did a VMLAUNCH of L2 and therefore expects L2 to
>> be run (and perhaps be injected with an event it specified, etc.). Nested_run_pending
>> is especially intended to avoid switching  to L1 in the injection decision-point.
>>
>> I catch this in the queue exception path, this patch fixes it by requesting
>> an immediate VM exit from L2 and keeping the exception for L1 pending for a
>> subsequent nested VM exit.
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>> ---
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> @@ -6356,8 +6356,8 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
>>                       kvm_update_dr7(vcpu);
>>               }
>
> Hm, we shouldn't execute the code above if exception won't be injected.
>
>>
>> -             kvm_x86_ops->queue_exception(vcpu);
>> -             return 0;
>
> vmx_complete_interrupts() assumes that the exception is always injected,
> so it would be dropped by kvm_clear_exception_queue().
>
> I'm starting to wonder whether getting rid of nested_run_pending
> wouldn't be nicer.

Yeah, I rethink of your concern for nested_run_pending w/ return value
is 0, actually the path in the calltrace is the else branch in
nested_vmx_check_exception(), an exception will be injected to L2 by
L1 if L1 owns this exception, otherwise injected by L0 directly. For
the nested_run_pending w/ return value is 0 stuff, we can treat it as
L0 injects the exception to L2 directly. So there is no exception is
injected to wrong guest.

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: nVMX: Fix trying to cancel vmlauch/vmresume
  2017-08-21 22:55   ` Wanpeng Li
@ 2017-08-21 23:09     ` Wanpeng Li
  2017-08-22  2:33       ` Wanpeng Li
  0 siblings, 1 reply; 7+ messages in thread
From: Wanpeng Li @ 2017-08-21 23:09 UTC (permalink / raw)
  To: Radim Krčmář; +Cc: linux-kernel, kvm, Paolo Bonzini, Wanpeng Li

2017-08-22 6:55 GMT+08:00 Wanpeng Li <kernellwp@gmail.com>:
> 2017-08-22 0:20 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>> 2017-08-18 07:11-0700, Wanpeng Li:
>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>
>>> ------------[ cut here ]------------
>>> WARNING: CPU: 7 PID: 3861 at /home/kernel/ssd/kvm/arch/x86/kvm//vmx.c:11299 nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
>>> CPU: 7 PID: 3861 Comm: qemu-system-x86 Tainted: G        W  OE   4.13.0-rc4+ #11
>>> RIP: 0010:nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
>>> Call Trace:
>>>  ? kvm_multiple_exception+0x149/0x170 [kvm]
>>>  ? handle_emulation_failure+0x79/0x230 [kvm]
>>>  ? load_vmcs12_host_state+0xa80/0xa80 [kvm_intel]
>>>  ? check_chain_key+0x137/0x1e0
>>>  ? reexecute_instruction.part.168+0x130/0x130 [kvm]
>>>  nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
>>>  ? nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
>>>  vmx_queue_exception+0x197/0x300 [kvm_intel]
>>>  kvm_arch_vcpu_ioctl_run+0x1b0c/0x2c90 [kvm]
>>>  ? kvm_arch_vcpu_runnable+0x220/0x220 [kvm]
>>>  ? preempt_count_sub+0x18/0xc0
>>>  ? restart_apic_timer+0x17d/0x300 [kvm]
>>>  ? kvm_lapic_restart_hv_timer+0x37/0x50 [kvm]
>>>  ? kvm_arch_vcpu_load+0x1d8/0x350 [kvm]
>>>  kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
>>>  ? kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
>>>  ? kvm_dev_ioctl+0xbe0/0xbe0 [kvm]
>>>
>>> The flag "nested_run_pending", which can override the decision of which should run
>>> next, L1 or L2. nested_run_pending=1 means that we *must* run L2 next, not L1. This
>>> is necessary in particular when L1 did a VMLAUNCH of L2 and therefore expects L2 to
>>> be run (and perhaps be injected with an event it specified, etc.). Nested_run_pending
>>> is especially intended to avoid switching  to L1 in the injection decision-point.
>>>
>>> I catch this in the queue exception path, this patch fixes it by requesting
>>> an immediate VM exit from L2 and keeping the exception for L1 pending for a
>>> subsequent nested VM exit.
>>>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>>> ---
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> @@ -6356,8 +6356,8 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
>>>                       kvm_update_dr7(vcpu);
>>>               }
>>
>> Hm, we shouldn't execute the code above if exception won't be injected.
>>
>>>
>>> -             kvm_x86_ops->queue_exception(vcpu);
>>> -             return 0;
>>
>> vmx_complete_interrupts() assumes that the exception is always injected,
>> so it would be dropped by kvm_clear_exception_queue().
>>
>> I'm starting to wonder whether getting rid of nested_run_pending
>> wouldn't be nicer.
>
> Yeah, I rethink of your concern for nested_run_pending w/ return value
> is 0, actually the path in the calltrace is the else branch in
> nested_vmx_check_exception(), an exception will be injected to L2 by
> L1 if L1 owns this exception, otherwise injected by L0 directly. For
> the nested_run_pending w/ return value is 0 stuff, we can treat it as
> L0 injects the exception to L2 directly. So there is no exception is
> injected to wrong guest.

I just sent out v3 to move the nested_run_pending stuff to the else branch.

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: nVMX: Fix trying to cancel vmlauch/vmresume
  2017-08-21 23:09     ` Wanpeng Li
@ 2017-08-22  2:33       ` Wanpeng Li
  0 siblings, 0 replies; 7+ messages in thread
From: Wanpeng Li @ 2017-08-22  2:33 UTC (permalink / raw)
  To: Radim Krčmář; +Cc: linux-kernel, kvm, Paolo Bonzini, Wanpeng Li

2017-08-22 7:09 GMT+08:00 Wanpeng Li <kernellwp@gmail.com>:
> 2017-08-22 6:55 GMT+08:00 Wanpeng Li <kernellwp@gmail.com>:
>> 2017-08-22 0:20 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>>> 2017-08-18 07:11-0700, Wanpeng Li:
>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>
>>>> ------------[ cut here ]------------
>>>> WARNING: CPU: 7 PID: 3861 at /home/kernel/ssd/kvm/arch/x86/kvm//vmx.c:11299 nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
>>>> CPU: 7 PID: 3861 Comm: qemu-system-x86 Tainted: G        W  OE   4.13.0-rc4+ #11
>>>> RIP: 0010:nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
>>>> Call Trace:
>>>>  ? kvm_multiple_exception+0x149/0x170 [kvm]
>>>>  ? handle_emulation_failure+0x79/0x230 [kvm]
>>>>  ? load_vmcs12_host_state+0xa80/0xa80 [kvm_intel]
>>>>  ? check_chain_key+0x137/0x1e0
>>>>  ? reexecute_instruction.part.168+0x130/0x130 [kvm]
>>>>  nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
>>>>  ? nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
>>>>  vmx_queue_exception+0x197/0x300 [kvm_intel]
>>>>  kvm_arch_vcpu_ioctl_run+0x1b0c/0x2c90 [kvm]
>>>>  ? kvm_arch_vcpu_runnable+0x220/0x220 [kvm]
>>>>  ? preempt_count_sub+0x18/0xc0
>>>>  ? restart_apic_timer+0x17d/0x300 [kvm]
>>>>  ? kvm_lapic_restart_hv_timer+0x37/0x50 [kvm]
>>>>  ? kvm_arch_vcpu_load+0x1d8/0x350 [kvm]
>>>>  kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
>>>>  ? kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
>>>>  ? kvm_dev_ioctl+0xbe0/0xbe0 [kvm]
>>>>
>>>> The flag "nested_run_pending", which can override the decision of which should run
>>>> next, L1 or L2. nested_run_pending=1 means that we *must* run L2 next, not L1. This
>>>> is necessary in particular when L1 did a VMLAUNCH of L2 and therefore expects L2 to
>>>> be run (and perhaps be injected with an event it specified, etc.). Nested_run_pending
>>>> is especially intended to avoid switching  to L1 in the injection decision-point.
>>>>
>>>> I catch this in the queue exception path, this patch fixes it by requesting
>>>> an immediate VM exit from L2 and keeping the exception for L1 pending for a
>>>> subsequent nested VM exit.
>>>>
>>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>>>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>>>> ---
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> @@ -6356,8 +6356,8 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
>>>>                       kvm_update_dr7(vcpu);
>>>>               }
>>>
>>> Hm, we shouldn't execute the code above if exception won't be injected.
>>>
>>>>
>>>> -             kvm_x86_ops->queue_exception(vcpu);
>>>> -             return 0;
>>>
>>> vmx_complete_interrupts() assumes that the exception is always injected,
>>> so it would be dropped by kvm_clear_exception_queue().
>>>
>>> I'm starting to wonder whether getting rid of nested_run_pending
>>> wouldn't be nicer.
>>
>> Yeah, I rethink of your concern for nested_run_pending w/ return value
>> is 0, actually the path in the calltrace is the else branch in
>> nested_vmx_check_exception(), an exception will be injected to L2 by
>> L1 if L1 owns this exception, otherwise injected by L0 directly. For
>> the nested_run_pending w/ return value is 0 stuff, we can treat it as
>> L0 injects the exception to L2 directly. So there is no exception is
>> injected to wrong guest.
>
> I just sent out v3 to move the nested_run_pending stuff to the else branch.

I can still encounter the splat w/ v3 due to #PF, so v1 is still better.

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: nVMX: Fix trying to cancel vmlauch/vmresume
  2017-08-21 16:20 ` Radim Krčmář
  2017-08-21 22:55   ` Wanpeng Li
@ 2017-08-23  7:13   ` Paolo Bonzini
  2017-08-23  7:26     ` Wanpeng Li
  1 sibling, 1 reply; 7+ messages in thread
From: Paolo Bonzini @ 2017-08-23  7:13 UTC (permalink / raw)
  To: Radim Krčmář, Wanpeng Li; +Cc: linux-kernel, kvm, Wanpeng Li

On 21/08/2017 18:20, Radim Krčmář wrote:
> 2017-08-18 07:11-0700, Wanpeng Li:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> ------------[ cut here ]------------
>> WARNING: CPU: 7 PID: 3861 at /home/kernel/ssd/kvm/arch/x86/kvm//vmx.c:11299 nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
>> CPU: 7 PID: 3861 Comm: qemu-system-x86 Tainted: G        W  OE   4.13.0-rc4+ #11
>> RIP: 0010:nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
>> Call Trace:
>>  ? kvm_multiple_exception+0x149/0x170 [kvm]
>>  ? handle_emulation_failure+0x79/0x230 [kvm]
>>  ? load_vmcs12_host_state+0xa80/0xa80 [kvm_intel]
>>  ? check_chain_key+0x137/0x1e0
>>  ? reexecute_instruction.part.168+0x130/0x130 [kvm]
>>  nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
>>  ? nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
>>  vmx_queue_exception+0x197/0x300 [kvm_intel]
>>  kvm_arch_vcpu_ioctl_run+0x1b0c/0x2c90 [kvm]
>>  ? kvm_arch_vcpu_runnable+0x220/0x220 [kvm]
>>  ? preempt_count_sub+0x18/0xc0
>>  ? restart_apic_timer+0x17d/0x300 [kvm]
>>  ? kvm_lapic_restart_hv_timer+0x37/0x50 [kvm]
>>  ? kvm_arch_vcpu_load+0x1d8/0x350 [kvm]
>>  kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
>>  ? kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
>>  ? kvm_dev_ioctl+0xbe0/0xbe0 [kvm]
>>
>> The flag "nested_run_pending", which can override the decision of which should run 
>> next, L1 or L2. nested_run_pending=1 means that we *must* run L2 next, not L1. This 
>> is necessary in particular when L1 did a VMLAUNCH of L2 and therefore expects L2 to 
>> be run (and perhaps be injected with an event it specified, etc.). Nested_run_pending 
>> is especially intended to avoid switching  to L1 in the injection decision-point.
>>
>> I catch this in the queue exception path, this patch fixes it by requesting 
>> an immediate VM exit from L2 and keeping the exception for L1 pending for a 
>> subsequent nested VM exit.
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>> ---
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> @@ -6356,8 +6356,8 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
>>  			kvm_update_dr7(vcpu);
>>  		}
> 
> Hm, we shouldn't execute the code above if exception won't be injected.

True, the code should have been when the exception is injected first,
not here.  But it's fine since in both cases (set RF and clear GD) it's
idempotent.

>>  
>> -		kvm_x86_ops->queue_exception(vcpu);
>> -		return 0;
> 
> vmx_complete_interrupts() assumes that the exception is always injected,
> so it would be dropped by kvm_clear_exception_queue().

That's a separate bug.  We need to separate exception.pending from
exception.injected, similar to NMIs (what is now exception.pending would
become exception.injected).

For now, this patch would be better than nothing, but getting the right
fix for 4.14 would be nice too.

Paolo

> I'm starting to wonder whether getting rid of nested_run_pending
> wouldn't be nicer.
> 
>> +		r = kvm_x86_ops->queue_exception(vcpu);
>> +		return r;
>>  	}
>>  
>>  	if (vcpu->arch.nmi_injected) {
>> -- 
>> 2.7.4
>>
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: nVMX: Fix trying to cancel vmlauch/vmresume
  2017-08-23  7:13   ` Paolo Bonzini
@ 2017-08-23  7:26     ` Wanpeng Li
  0 siblings, 0 replies; 7+ messages in thread
From: Wanpeng Li @ 2017-08-23  7:26 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Radim Krčmář, linux-kernel, kvm, Wanpeng Li

2017-08-23 15:13 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
> On 21/08/2017 18:20, Radim Krčmář wrote:
>> 2017-08-18 07:11-0700, Wanpeng Li:
>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>
>>> ------------[ cut here ]------------
>>> WARNING: CPU: 7 PID: 3861 at /home/kernel/ssd/kvm/arch/x86/kvm//vmx.c:11299 nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
>>> CPU: 7 PID: 3861 Comm: qemu-system-x86 Tainted: G        W  OE   4.13.0-rc4+ #11
>>> RIP: 0010:nested_vmx_vmexit+0x176e/0x1980 [kvm_intel]
>>> Call Trace:
>>>  ? kvm_multiple_exception+0x149/0x170 [kvm]
>>>  ? handle_emulation_failure+0x79/0x230 [kvm]
>>>  ? load_vmcs12_host_state+0xa80/0xa80 [kvm_intel]
>>>  ? check_chain_key+0x137/0x1e0
>>>  ? reexecute_instruction.part.168+0x130/0x130 [kvm]
>>>  nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
>>>  ? nested_vmx_inject_exception_vmexit+0xb7/0x100 [kvm_intel]
>>>  vmx_queue_exception+0x197/0x300 [kvm_intel]
>>>  kvm_arch_vcpu_ioctl_run+0x1b0c/0x2c90 [kvm]
>>>  ? kvm_arch_vcpu_runnable+0x220/0x220 [kvm]
>>>  ? preempt_count_sub+0x18/0xc0
>>>  ? restart_apic_timer+0x17d/0x300 [kvm]
>>>  ? kvm_lapic_restart_hv_timer+0x37/0x50 [kvm]
>>>  ? kvm_arch_vcpu_load+0x1d8/0x350 [kvm]
>>>  kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
>>>  ? kvm_vcpu_ioctl+0x4e4/0x910 [kvm]
>>>  ? kvm_dev_ioctl+0xbe0/0xbe0 [kvm]
>>>
>>> The flag "nested_run_pending", which can override the decision of which should run
>>> next, L1 or L2. nested_run_pending=1 means that we *must* run L2 next, not L1. This
>>> is necessary in particular when L1 did a VMLAUNCH of L2 and therefore expects L2 to
>>> be run (and perhaps be injected with an event it specified, etc.). Nested_run_pending
>>> is especially intended to avoid switching  to L1 in the injection decision-point.
>>>
>>> I catch this in the queue exception path, this patch fixes it by requesting
>>> an immediate VM exit from L2 and keeping the exception for L1 pending for a
>>> subsequent nested VM exit.
>>>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>>> ---
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> @@ -6356,8 +6356,8 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
>>>                      kvm_update_dr7(vcpu);
>>>              }
>>
>> Hm, we shouldn't execute the code above if exception won't be injected.
>
> True, the code should have been when the exception is injected first,
> not here.  But it's fine since in both cases (set RF and clear GD) it's
> idempotent.
>
>>>
>>> -            kvm_x86_ops->queue_exception(vcpu);
>>> -            return 0;
>>
>> vmx_complete_interrupts() assumes that the exception is always injected,
>> so it would be dropped by kvm_clear_exception_queue().
>
> That's a separate bug.  We need to separate exception.pending from
> exception.injected, similar to NMIs (what is now exception.pending would
> become exception.injected).
>
> For now, this patch would be better than nothing, but getting the right
> fix for 4.14 would be nice too.

Ok, I will figure out the exception.pending and exception.injected later. :)

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-08-23  7:26 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-18 14:11 [PATCH v2] KVM: nVMX: Fix trying to cancel vmlauch/vmresume Wanpeng Li
2017-08-21 16:20 ` Radim Krčmář
2017-08-21 22:55   ` Wanpeng Li
2017-08-21 23:09     ` Wanpeng Li
2017-08-22  2:33       ` Wanpeng Li
2017-08-23  7:13   ` Paolo Bonzini
2017-08-23  7:26     ` Wanpeng Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).