All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] RFC: VMX: fix for disappearing L1->L2 event injection on L1 migration
@ 2021-01-06 10:53 Maxim Levitsky
  2021-01-06 10:53 ` [PATCH 1/2] KVM: VMX: create vmx_process_injected_event Maxim Levitsky
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Maxim Levitsky @ 2021-01-06 10:53 UTC (permalink / raw)
  To: kvm
  Cc: Joerg Roedel, Wanpeng Li,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Vitaly Kuznetsov, H. Peter Anvin, Sean Christopherson,
	Paolo Bonzini, Ingo Molnar, Borislav Petkov, Jim Mattson,
	Thomas Gleixner, Maxim Levitsky

This is VMX version of the same issue as I reproduced on SVM.

Unlike SVM, this version has 2 pending issues to resolve.

1. This seems to break 'vmx' kvm-unit-test in
'error code <-> (!URG || prot_mode) [+]' case.

The test basically tries to do nested vm entry with unrestricted guest disabled,
real mode, and for some reason that works without patch 2 of this series and it
doesn't cause the #GP to be injected, but with this patch the test complains
about unexpected #GP.
I suspect that this test case is broken, but this has to be investigated.

2. L1 MTF injections are lost since kvm has no notion of them, this is TBD to
be fixed.

This was lightly tested on my nested migration test which no VMX sadly still
crashes and burns on an (likely) unrelated issue.

Best regards,
	Maxim Levitsky

Maxim Levitsky (2):
  KVM: VMX: create vmx_process_injected_event
  KVM: nVMX: fix for disappearing L1->L2 event injection on L1 migration

 arch/x86/kvm/vmx/nested.c | 12 ++++----
 arch/x86/kvm/vmx/vmx.c    | 60 ++++++++++++++++++++++++---------------
 arch/x86/kvm/vmx/vmx.h    |  4 +++
 3 files changed, 47 insertions(+), 29 deletions(-)

-- 
2.26.2



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] KVM: VMX: create vmx_process_injected_event
  2021-01-06 10:53 [PATCH 0/2] RFC: VMX: fix for disappearing L1->L2 event injection on L1 migration Maxim Levitsky
@ 2021-01-06 10:53 ` Maxim Levitsky
  2021-01-06 10:53 ` [PATCH 2/2] KVM: nVMX: fix for disappearing L1->L2 event injection on L1 migration Maxim Levitsky
  2021-01-06 18:07 ` [PATCH 0/2] RFC: VMX: " Sean Christopherson
  2 siblings, 0 replies; 7+ messages in thread
From: Maxim Levitsky @ 2021-01-06 10:53 UTC (permalink / raw)
  To: kvm
  Cc: Joerg Roedel, Wanpeng Li,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Vitaly Kuznetsov, H. Peter Anvin, Sean Christopherson,
	Paolo Bonzini, Ingo Molnar, Borislav Petkov, Jim Mattson,
	Thomas Gleixner, Maxim Levitsky

Refactor the logic that is dealing with parsing of an injected event to a
separate function.

This will be used in the next patch to deal with the events that L1 wants to
inject to L2 in a way that survives migration.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 60 ++++++++++++++++++++++++++----------------
 arch/x86/kvm/vmx/vmx.h |  4 +++
 2 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 75c9c6a0a3a45..dec6bc94a56b4 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6442,29 +6442,16 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 					      vmx->loaded_vmcs->entry_time));
 }
 
-static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
-				      u32 idt_vectoring_info,
-				      int instr_len_field,
-				      int error_code_field)
+void vmx_process_injected_event(struct kvm_vcpu *vcpu,
+				u32 idt_vectoring_info,
+				u32 instr_len,
+				u32 error_code)
 {
-	u8 vector;
-	int type;
-	bool idtv_info_valid;
-
-	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
-
-	vcpu->arch.nmi_injected = false;
-	kvm_clear_exception_queue(vcpu);
-	kvm_clear_interrupt_queue(vcpu);
-
-	if (!idtv_info_valid)
-		return;
+	u8 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
+	u32 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
 
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 
-	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
-	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
-
 	switch (type) {
 	case INTR_TYPE_NMI_INTR:
 		vcpu->arch.nmi_injected = true;
@@ -6476,17 +6463,16 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
 		vmx_set_nmi_mask(vcpu, false);
 		break;
 	case INTR_TYPE_SOFT_EXCEPTION:
-		vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
+		vcpu->arch.event_exit_inst_len = instr_len;
 		fallthrough;
 	case INTR_TYPE_HARD_EXCEPTION:
 		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
-			u32 err = vmcs_read32(error_code_field);
-			kvm_requeue_exception_e(vcpu, vector, err);
+			kvm_requeue_exception_e(vcpu, vector, error_code);
 		} else
 			kvm_requeue_exception(vcpu, vector);
 		break;
 	case INTR_TYPE_SOFT_INTR:
-		vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
+		vcpu->arch.event_exit_inst_len = instr_len;
 		fallthrough;
 	case INTR_TYPE_EXT_INTR:
 		kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
@@ -6496,6 +6482,34 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
 	}
 }
 
+static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
+				      u32 idt_vectoring_info,
+				      int instr_len_field,
+				      int error_code_field)
+{
+	u32 instr_len = 0, err_code = 0;
+	u32 type;
+
+	vcpu->arch.nmi_injected = false;
+	kvm_clear_exception_queue(vcpu);
+	kvm_clear_interrupt_queue(vcpu);
+
+	if (!(idt_vectoring_info & VECTORING_INFO_VALID_MASK))
+		return;
+
+	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
+
+	if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK)
+		err_code = vmcs_read32(error_code_field);
+
+	if (type == INTR_TYPE_SOFT_EXCEPTION || type == INTR_TYPE_SOFT_INTR)
+		instr_len = vmcs_read32(instr_len_field);
+
+	vmx_process_injected_event(vcpu, idt_vectoring_info, instr_len,
+				   err_code);
+}
+
+
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
 	__vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 9d3a557949ac2..0c9ecada11025 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -336,6 +336,10 @@ bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu);
 bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
 void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
 void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
+void vmx_process_injected_event(struct kvm_vcpu *vcpu,
+				u32 idt_vectoring_info,
+				u32 instr_len,
+				u32 error_code);
 struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
 void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
 void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] KVM: nVMX: fix for disappearing L1->L2 event injection on L1 migration
  2021-01-06 10:53 [PATCH 0/2] RFC: VMX: fix for disappearing L1->L2 event injection on L1 migration Maxim Levitsky
  2021-01-06 10:53 ` [PATCH 1/2] KVM: VMX: create vmx_process_injected_event Maxim Levitsky
@ 2021-01-06 10:53 ` Maxim Levitsky
  2021-01-06 18:17   ` Sean Christopherson
  2021-01-06 18:07 ` [PATCH 0/2] RFC: VMX: " Sean Christopherson
  2 siblings, 1 reply; 7+ messages in thread
From: Maxim Levitsky @ 2021-01-06 10:53 UTC (permalink / raw)
  To: kvm
  Cc: Joerg Roedel, Wanpeng Li,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Vitaly Kuznetsov, H. Peter Anvin, Sean Christopherson,
	Paolo Bonzini, Ingo Molnar, Borislav Petkov, Jim Mattson,
	Thomas Gleixner, Maxim Levitsky

If migration happens while L2 entry with an injected event to L2 is pending,
we weren't including the event in the migration state and it would be
lost leading to L2 hang.

Fix this by queueing the injected event in similar manner to how we queue
interrupted injections.

This can be reproduced by running an IO intense task in L2,
and repeatedly migrating the L1.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index e2f26564a12de..2ea0bb14f385f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2355,12 +2355,12 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	 * Interrupt/Exception Fields
 	 */
 	if (vmx->nested.nested_run_pending) {
-		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-			     vmcs12->vm_entry_intr_info_field);
-		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
-			     vmcs12->vm_entry_exception_error_code);
-		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
-			     vmcs12->vm_entry_instruction_len);
+		if ((vmcs12->vm_entry_intr_info_field & VECTORING_INFO_VALID_MASK))
+			vmx_process_injected_event(&vmx->vcpu,
+						   vmcs12->vm_entry_intr_info_field,
+						   vmcs12->vm_entry_instruction_len,
+						   vmcs12->vm_entry_exception_error_code);
+
 		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
 			     vmcs12->guest_interruptibility_info);
 		vmx->loaded_vmcs->nmi_known_unmasked =
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/2] RFC: VMX: fix for disappearing L1->L2 event injection on L1 migration
  2021-01-06 10:53 [PATCH 0/2] RFC: VMX: fix for disappearing L1->L2 event injection on L1 migration Maxim Levitsky
  2021-01-06 10:53 ` [PATCH 1/2] KVM: VMX: create vmx_process_injected_event Maxim Levitsky
  2021-01-06 10:53 ` [PATCH 2/2] KVM: nVMX: fix for disappearing L1->L2 event injection on L1 migration Maxim Levitsky
@ 2021-01-06 18:07 ` Sean Christopherson
  2 siblings, 0 replies; 7+ messages in thread
From: Sean Christopherson @ 2021-01-06 18:07 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: kvm, Joerg Roedel, Wanpeng Li,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Vitaly Kuznetsov, H. Peter Anvin, Sean Christopherson,
	Paolo Bonzini, Ingo Molnar, Borislav Petkov, Jim Mattson,
	Thomas Gleixner

On Wed, Jan 06, 2021, Maxim Levitsky wrote:
> This is VMX version of the same issue as I reproduced on SVM.
> 
> Unlike SVM, this version has 2 pending issues to resolve.
> 
> 1. This seems to break 'vmx' kvm-unit-test in
> 'error code <-> (!URG || prot_mode) [+]' case.
> 
> The test basically tries to do nested vm entry with unrestricted guest disabled,
> real mode, and for some reason that works without patch 2 of this series and it
> doesn't cause the #GP to be injected, but with this patch the test complains
> about unexpected #GP.

An unexpected #GP for that test is very unlikely.  The various sub-tests under
vmx_controls_test() should never fully enter the guest as GUEST.RFLAGS is set to
an invalid value.  And, that specific test does VM-Enter with URG=0 and
CR0.PG/PE=0, which is also invalid.  The unit test uses test_vmx_valid_controls(),
which is a wee bit misleading, as the "early" consistency checks that cause
VM-Fail are expected to succeed, while the VM-Enter is still expected to "fail"
due to a consistency check VM-Exit.

> I suspect that this test case is broken, but this has to be investigated.
> 
> 2. L1 MTF injections are lost since kvm has no notion of them, this is TBD to
> be fixed.
> 
> This was lightly tested on my nested migration test which no VMX sadly still
> crashes and burns on an (likely) unrelated issue.
> 
> Best regards,
> 	Maxim Levitsky
> 
> Maxim Levitsky (2):
>   KVM: VMX: create vmx_process_injected_event
>   KVM: nVMX: fix for disappearing L1->L2 event injection on L1 migration
> 
>  arch/x86/kvm/vmx/nested.c | 12 ++++----
>  arch/x86/kvm/vmx/vmx.c    | 60 ++++++++++++++++++++++++---------------
>  arch/x86/kvm/vmx/vmx.h    |  4 +++
>  3 files changed, 47 insertions(+), 29 deletions(-)
> 
> -- 
> 2.26.2
> 
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM: nVMX: fix for disappearing L1->L2 event injection on L1 migration
  2021-01-06 10:53 ` [PATCH 2/2] KVM: nVMX: fix for disappearing L1->L2 event injection on L1 migration Maxim Levitsky
@ 2021-01-06 18:17   ` Sean Christopherson
  2021-01-07  2:38     ` Maxim Levitsky
  0 siblings, 1 reply; 7+ messages in thread
From: Sean Christopherson @ 2021-01-06 18:17 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: kvm, Joerg Roedel, Wanpeng Li,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Vitaly Kuznetsov, H. Peter Anvin, Sean Christopherson,
	Paolo Bonzini, Ingo Molnar, Borislav Petkov, Jim Mattson,
	Thomas Gleixner

On Wed, Jan 06, 2021, Maxim Levitsky wrote:
> If migration happens while L2 entry with an injected event to L2 is pending,
> we weren't including the event in the migration state and it would be
> lost leading to L2 hang.

But the injected event should still be in vmcs12 and KVM_STATE_NESTED_RUN_PENDING
should be set in the migration state, i.e. it should naturally be copied to
vmcs02 and thus (re)injected by vmx_set_nested_state().  Is nested_run_pending
not set?  Is the info in vmcs12 somehow lost?  Or am I off in left field...
 
> Fix this by queueing the injected event in similar manner to how we queue
> interrupted injections.
> 
> This can be reproduced by running an IO intense task in L2,
> and repeatedly migrating the L1.
> 
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  arch/x86/kvm/vmx/nested.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index e2f26564a12de..2ea0bb14f385f 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2355,12 +2355,12 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
>  	 * Interrupt/Exception Fields
>  	 */
>  	if (vmx->nested.nested_run_pending) {
> -		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
> -			     vmcs12->vm_entry_intr_info_field);
> -		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
> -			     vmcs12->vm_entry_exception_error_code);
> -		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> -			     vmcs12->vm_entry_instruction_len);
> +		if ((vmcs12->vm_entry_intr_info_field & VECTORING_INFO_VALID_MASK))
> +			vmx_process_injected_event(&vmx->vcpu,
> +						   vmcs12->vm_entry_intr_info_field,
> +						   vmcs12->vm_entry_instruction_len,
> +						   vmcs12->vm_entry_exception_error_code);
> +
>  		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
>  			     vmcs12->guest_interruptibility_info);
>  		vmx->loaded_vmcs->nmi_known_unmasked =
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM: nVMX: fix for disappearing L1->L2 event injection on L1 migration
  2021-01-06 18:17   ` Sean Christopherson
@ 2021-01-07  2:38     ` Maxim Levitsky
  2021-01-07  9:41       ` Maxim Levitsky
  0 siblings, 1 reply; 7+ messages in thread
From: Maxim Levitsky @ 2021-01-07  2:38 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: kvm, Joerg Roedel, Wanpeng Li,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Vitaly Kuznetsov, H. Peter Anvin, Sean Christopherson,
	Paolo Bonzini, Ingo Molnar, Borislav Petkov, Jim Mattson,
	Thomas Gleixner

On Wed, 2021-01-06 at 10:17 -0800, Sean Christopherson wrote:
> On Wed, Jan 06, 2021, Maxim Levitsky wrote:
> > If migration happens while L2 entry with an injected event to L2 is pending,
> > we weren't including the event in the migration state and it would be
> > lost leading to L2 hang.
> 
> But the injected event should still be in vmcs12 and KVM_STATE_NESTED_RUN_PENDING
> should be set in the migration state, i.e. it should naturally be copied to
> vmcs02 and thus (re)injected by vmx_set_nested_state().  Is nested_run_pending
> not set?  Is the info in vmcs12 somehow lost?  Or am I off in left field...


You are completely right. 
The injected event can be copied like that since the vmc(b|s)12 is migrated.

We can safely disregard both these two patches and the parallel two patches for SVM.
I am almost sure that the real root cause of this bug was that we 
weren't restoring the nested run pending flag, and I even 
happened to fix this in this patch series.

This is the trace of the bug (I removed the timestamps to make it easier to read)


kvm_exit:             vcpu 0 reason vmrun rip 0xffffffffa0688ffa info1 0x0000000000000000 info2 0x0000000000000000 intr_info 0x00000000 error_code 0x00000000
kvm_nested_vmrun:     rip: 0xffffffffa0688ffa vmcb: 0x0000000103594000 nrip: 0xffffffff814b3b01 int_ctl: 0x01000001 event_inj: 0x80000036 npt: on
																^^^ this is the injection
kvm_nested_intercepts: cr_read: 0010 cr_write: 0010 excp: 00060042 intercepts: bc4c8027 00006e7f 00000000
kvm_fpu:              unload
kvm_userspace_exit:   reason KVM_EXIT_INTR (10)

============================================================================
migration happens here
============================================================================

...
kvm_async_pf_ready:   token 0xffffffff gva 0
kvm_apic_accept_irq:  apicid 0 vec 243 (Fixed|edge)

kvm_nested_intr_vmexit: rip: 0x000000000000fff0

^^^^^ this is the nested vmexit that shouldn't have happened, since nested run is pending,
and which erased the eventinj field which was migrated correctly just like you say.

kvm_nested_vmexit_inject: reason: interrupt ext_inf1: 0x0000000000000000 ext_inf2: 0x0000000000000000 ext_int: 0x00000000 ext_int_err: 0x00000000
...


We did notice that this vmexit had a wierd RIP and I 
even explained this later to myself,
that this is the default RIP which we put to vmcb, 
and it wasn't yet updated, since it updates just prior to vm entry.

My test already survived about 170 iterations (usually it crashes after 20-40 iterations)
I am leaving the stress test running all night, let see if it survives.

V2 of the patches is on the way.

Thanks again for the help!

Best regards,
	Maxim Levitsky

>  
> > Fix this by queueing the injected event in similar manner to how we queue
> > interrupted injections.
> > 
> > This can be reproduced by running an IO intense task in L2,
> > and repeatedly migrating the L1.
> > 
> > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  arch/x86/kvm/vmx/nested.c | 12 ++++++------
> >  1 file changed, 6 insertions(+), 6 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> > index e2f26564a12de..2ea0bb14f385f 100644
> > --- a/arch/x86/kvm/vmx/nested.c
> > +++ b/arch/x86/kvm/vmx/nested.c
> > @@ -2355,12 +2355,12 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
> >  	 * Interrupt/Exception Fields
> >  	 */
> >  	if (vmx->nested.nested_run_pending) {
> > -		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
> > -			     vmcs12->vm_entry_intr_info_field);
> > -		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
> > -			     vmcs12->vm_entry_exception_error_code);
> > -		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> > -			     vmcs12->vm_entry_instruction_len);
> > +		if ((vmcs12->vm_entry_intr_info_field & VECTORING_INFO_VALID_MASK))
> > +			vmx_process_injected_event(&vmx->vcpu,
> > +						   vmcs12->vm_entry_intr_info_field,
> > +						   vmcs12->vm_entry_instruction_len,
> > +						   vmcs12->vm_entry_exception_error_code);
> > +
> >  		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
> >  			     vmcs12->guest_interruptibility_info);
> >  		vmx->loaded_vmcs->nmi_known_unmasked =
> > -- 
> > 2.26.2
> > 



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM: nVMX: fix for disappearing L1->L2 event injection on L1 migration
  2021-01-07  2:38     ` Maxim Levitsky
@ 2021-01-07  9:41       ` Maxim Levitsky
  0 siblings, 0 replies; 7+ messages in thread
From: Maxim Levitsky @ 2021-01-07  9:41 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: kvm, Joerg Roedel, Wanpeng Li,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Vitaly Kuznetsov, H. Peter Anvin, Sean Christopherson,
	Paolo Bonzini, Ingo Molnar, Borislav Petkov, Jim Mattson,
	Thomas Gleixner

On Thu, 2021-01-07 at 04:38 +0200, Maxim Levitsky wrote:
> On Wed, 2021-01-06 at 10:17 -0800, Sean Christopherson wrote:
> > On Wed, Jan 06, 2021, Maxim Levitsky wrote:
> > > If migration happens while L2 entry with an injected event to L2 is pending,
> > > we weren't including the event in the migration state and it would be
> > > lost leading to L2 hang.
> > 
> > But the injected event should still be in vmcs12 and KVM_STATE_NESTED_RUN_PENDING
> > should be set in the migration state, i.e. it should naturally be copied to
> > vmcs02 and thus (re)injected by vmx_set_nested_state().  Is nested_run_pending
> > not set?  Is the info in vmcs12 somehow lost?  Or am I off in left field...
> 
> You are completely right. 
> The injected event can be copied like that since the vmc(b|s)12 is migrated.
> 
> We can safely disregard both these two patches and the parallel two patches for SVM.
> I am almost sure that the real root cause of this bug was that we 
> weren't restoring the nested run pending flag, and I even 
> happened to fix this in this patch series.
> 
> This is the trace of the bug (I removed the timestamps to make it easier to read)
> 
> 
> kvm_exit:             vcpu 0 reason vmrun rip 0xffffffffa0688ffa info1 0x0000000000000000 info2 0x0000000000000000 intr_info 0x00000000 error_code 0x00000000
> kvm_nested_vmrun:     rip: 0xffffffffa0688ffa vmcb: 0x0000000103594000 nrip: 0xffffffff814b3b01 int_ctl: 0x01000001 event_inj: 0x80000036 npt: on
> 																^^^ this is the injection
> kvm_nested_intercepts: cr_read: 0010 cr_write: 0010 excp: 00060042 intercepts: bc4c8027 00006e7f 00000000
> kvm_fpu:              unload
> kvm_userspace_exit:   reason KVM_EXIT_INTR (10)
> 
> ============================================================================
> migration happens here
> ============================================================================
> 
> ...
> kvm_async_pf_ready:   token 0xffffffff gva 0
> kvm_apic_accept_irq:  apicid 0 vec 243 (Fixed|edge)
> 
> kvm_nested_intr_vmexit: rip: 0x000000000000fff0
> 
> ^^^^^ this is the nested vmexit that shouldn't have happened, since nested run is pending,
> and which erased the eventinj field which was migrated correctly just like you say.
> 
> kvm_nested_vmexit_inject: reason: interrupt ext_inf1: 0x0000000000000000 ext_inf2: 0x0000000000000000 ext_int: 0x00000000 ext_int_err: 0x00000000
> ...
> 
> 
> We did notice that this vmexit had a wierd RIP and I 
> even explained this later to myself,
> that this is the default RIP which we put to vmcb, 
> and it wasn't yet updated, since it updates just prior to vm entry.
> 
> My test already survived about 170 iterations (usually it crashes after 20-40 iterations)
> I am leaving the stress test running all night, let see if it survives.

And after leaving it overnight, the test survived about 1000 iterations.

Thanks again!

Best regards,
	Maxim Levitstky


> 
> V2 of the patches is on the way.
> 
> Thanks again for the help!
> 
> Best regards,
> 	Maxim Levitsky
> 
> >  
> > > Fix this by queueing the injected event in similar manner to how we queue
> > > interrupted injections.
> > > 
> > > This can be reproduced by running an IO intense task in L2,
> > > and repeatedly migrating the L1.
> > > 
> > > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > ---
> > >  arch/x86/kvm/vmx/nested.c | 12 ++++++------
> > >  1 file changed, 6 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> > > index e2f26564a12de..2ea0bb14f385f 100644
> > > --- a/arch/x86/kvm/vmx/nested.c
> > > +++ b/arch/x86/kvm/vmx/nested.c
> > > @@ -2355,12 +2355,12 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
> > >  	 * Interrupt/Exception Fields
> > >  	 */
> > >  	if (vmx->nested.nested_run_pending) {
> > > -		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
> > > -			     vmcs12->vm_entry_intr_info_field);
> > > -		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
> > > -			     vmcs12->vm_entry_exception_error_code);
> > > -		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> > > -			     vmcs12->vm_entry_instruction_len);
> > > +		if ((vmcs12->vm_entry_intr_info_field & VECTORING_INFO_VALID_MASK))
> > > +			vmx_process_injected_event(&vmx->vcpu,
> > > +						   vmcs12->vm_entry_intr_info_field,
> > > +						   vmcs12->vm_entry_instruction_len,
> > > +						   vmcs12->vm_entry_exception_error_code);
> > > +
> > >  		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
> > >  			     vmcs12->guest_interruptibility_info);
> > >  		vmx->loaded_vmcs->nmi_known_unmasked =
> > > -- 
> > > 2.26.2
> > > 



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-01-07  9:42 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-06 10:53 [PATCH 0/2] RFC: VMX: fix for disappearing L1->L2 event injection on L1 migration Maxim Levitsky
2021-01-06 10:53 ` [PATCH 1/2] KVM: VMX: create vmx_process_injected_event Maxim Levitsky
2021-01-06 10:53 ` [PATCH 2/2] KVM: nVMX: fix for disappearing L1->L2 event injection on L1 migration Maxim Levitsky
2021-01-06 18:17   ` Sean Christopherson
2021-01-07  2:38     ` Maxim Levitsky
2021-01-07  9:41       ` Maxim Levitsky
2021-01-06 18:07 ` [PATCH 0/2] RFC: VMX: " Sean Christopherson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.