linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] KVM: X86: Ultra fast single target IPI fastpath
@ 2020-04-10  1:03 Wanpeng Li
  2020-04-10 15:17 ` Paolo Bonzini
  2020-04-10 15:35 ` Sean Christopherson
  0 siblings, 2 replies; 10+ messages in thread
From: Wanpeng Li @ 2020-04-10  1:03 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li,
	Jim Mattson, Joerg Roedel, Haiwei Li

From: Wanpeng Li <wanpengli@tencent.com>

IPI and Timer cause the main MSRs write vmexits in cloud environment 
observation, let's optimize virtual IPI latency more aggressively to 
inject target IPI as soon as possible.

Running kvm-unit-tests/vmexit.flat IPI testing on SKX server, disable 
adaptive advance lapic timer and adaptive halt-polling to avoid the 
interference, this patch can give another 7% improvement.

w/o fastpath -> fastpath            4238 -> 3543  16.4%
fastpath     -> ultra fastpath      3543 -> 3293     7%
w/o fastpath -> ultra fastpath      4238 -> 3293  22.3% 

This also revises the performance data in commit 1e9e2622a1 (KVM: VMX: 
FIXED+PHYSICAL mode single target IPI fastpath), that testing adds
--overcommit cpu-pm=on to kvm-unit-tests guest which is unnecessary.

Tested-by: Haiwei Li <lihaiwei@tencent.com>
Cc: Haiwei Li <lihaiwei@tencent.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
v1 -> v2:
 * rebase on latest kvm/queue
 * update patch description

 arch/x86/include/asm/kvm_host.h |  6 +++---
 arch/x86/kvm/svm/svm.c          | 21 ++++++++++++++-------
 arch/x86/kvm/vmx/vmx.c          | 19 +++++++++++++------
 arch/x86/kvm/x86.c              |  4 ++--
 4 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c7da23a..e667cf3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1124,7 +1124,8 @@ struct kvm_x86_ops {
 	 */
 	void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
 
-	void (*run)(struct kvm_vcpu *vcpu);
+	void (*run)(struct kvm_vcpu *vcpu,
+		enum exit_fastpath_completion *exit_fastpath);
 	int (*handle_exit)(struct kvm_vcpu *vcpu,
 		enum exit_fastpath_completion exit_fastpath);
 	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
@@ -1174,8 +1175,7 @@ struct kvm_x86_ops {
 			       struct x86_instruction_info *info,
 			       enum x86_intercept_stage stage,
 			       struct x86_exception *exception);
-	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
-		enum exit_fastpath_completion *exit_fastpath);
+	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
 
 	int (*check_nested_events)(struct kvm_vcpu *vcpu);
 	void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 27f4684..c019332 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3283,9 +3283,20 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
 	svm_complete_interrupts(svm);
 }
 
+static enum exit_fastpath_completion svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+{
+	if (!is_guest_mode(vcpu) &&
+	    to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+	    to_svm(vcpu)->vmcb->control.exit_info_1)
+		return handle_fastpath_set_msr_irqoff(vcpu);
+
+	return EXIT_FASTPATH_NONE;
+}
+
 bool __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
 
-static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+static void svm_vcpu_run(struct kvm_vcpu *vcpu,
+	enum exit_fastpath_completion *exit_fastpath)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -3388,6 +3399,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	kvm_load_host_xsave_state(vcpu);
 	stgi();
 
+	*exit_fastpath = svm_exit_handlers_fastpath(vcpu);
 	/* Any pending NMI will happen here */
 
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
@@ -3719,13 +3731,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
 	return ret;
 }
 
-static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
-	enum exit_fastpath_completion *exit_fastpath)
+static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 {
-	if (!is_guest_mode(vcpu) &&
-	    to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
-	    to_svm(vcpu)->vmcb->control.exit_info_1)
-		*exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
 }
 
 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1d2bb57..61a1725 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6354,8 +6354,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 }
 STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
 
-static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
-	enum exit_fastpath_completion *exit_fastpath)
+static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
@@ -6363,9 +6362,6 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
 		handle_external_interrupt_irqoff(vcpu);
 	else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
 		handle_exception_nmi_irqoff(vmx);
-	else if (!is_guest_mode(vcpu) &&
-		vmx->exit_reason == EXIT_REASON_MSR_WRITE)
-		*exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
 }
 
 static bool vmx_has_emulated_msr(int index)
@@ -6570,9 +6566,19 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
 	}
 }
 
+static enum exit_fastpath_completion vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+{
+	if (!is_guest_mode(vcpu) &&
+		to_vmx(vcpu)->exit_reason == EXIT_REASON_MSR_WRITE)
+		return handle_fastpath_set_msr_irqoff(vcpu);
+
+	return EXIT_FASTPATH_NONE;
+}
+
 bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
 
-static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_run(struct kvm_vcpu *vcpu,
+	enum exit_fastpath_completion *exit_fastpath)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long cr3, cr4;
@@ -6737,6 +6743,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx->idt_vectoring_info = 0;
 
 	vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
+	*exit_fastpath = vmx_exit_handlers_fastpath(vcpu);
 	if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
 		kvm_machine_check();
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3089aa4..eed31e2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8409,7 +8409,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	}
 
-	kvm_x86_ops.run(vcpu);
+	kvm_x86_ops.run(vcpu, &exit_fastpath);
 
 	/*
 	 * Do this here before restoring debug registers on the host.  And
@@ -8441,7 +8441,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
 
-	kvm_x86_ops.handle_exit_irqoff(vcpu, &exit_fastpath);
+	kvm_x86_ops.handle_exit_irqoff(vcpu);
 
 	/*
 	 * Consume any pending interrupts, including the possible source of
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] KVM: X86: Ultra fast single target IPI fastpath
  2020-04-10  1:03 [PATCH v2] KVM: X86: Ultra fast single target IPI fastpath Wanpeng Li
@ 2020-04-10 15:17 ` Paolo Bonzini
  2020-04-13  1:43   ` Wanpeng Li
  2020-04-10 15:35 ` Sean Christopherson
  1 sibling, 1 reply; 10+ messages in thread
From: Paolo Bonzini @ 2020-04-10 15:17 UTC (permalink / raw)
  To: Wanpeng Li, linux-kernel, kvm
  Cc: Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Haiwei Li

On 10/04/20 03:03, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> IPI and Timer cause the main MSRs write vmexits in cloud environment 
> observation, let's optimize virtual IPI latency more aggressively to 
> inject target IPI as soon as possible.
> 
> Running kvm-unit-tests/vmexit.flat IPI testing on SKX server, disable 
> adaptive advance lapic timer and adaptive halt-polling to avoid the 
> interference, this patch can give another 7% improvement.
> 
> w/o fastpath -> fastpath            4238 -> 3543  16.4%
> fastpath     -> ultra fastpath      3543 -> 3293     7%
> w/o fastpath -> ultra fastpath      4238 -> 3293  22.3% 
> 
> This also revises the performance data in commit 1e9e2622a1 (KVM: VMX: 
> FIXED+PHYSICAL mode single target IPI fastpath), that testing adds
> --overcommit cpu-pm=on to kvm-unit-tests guest which is unnecessary.
> 
> Tested-by: Haiwei Li <lihaiwei@tencent.com>
> Cc: Haiwei Li <lihaiwei@tencent.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
> v1 -> v2:
>  * rebase on latest kvm/queue
>  * update patch description
> 
>  arch/x86/include/asm/kvm_host.h |  6 +++---
>  arch/x86/kvm/svm/svm.c          | 21 ++++++++++++++-------
>  arch/x86/kvm/vmx/vmx.c          | 19 +++++++++++++------
>  arch/x86/kvm/x86.c              |  4 ++--
>  4 files changed, 32 insertions(+), 18 deletions(-)

That's less ugly than I expected. :D  I'll queue it in the next week or
so.  But even though the commit subject is cool, I'll change it to "KVM:
x86: move IPI fastpath inside kvm_x86_ops.run".

Thanks,

Paolo


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] KVM: X86: Ultra fast single target IPI fastpath
  2020-04-10  1:03 [PATCH v2] KVM: X86: Ultra fast single target IPI fastpath Wanpeng Li
  2020-04-10 15:17 ` Paolo Bonzini
@ 2020-04-10 15:35 ` Sean Christopherson
  2020-04-10 15:50   ` Paolo Bonzini
  1 sibling, 1 reply; 10+ messages in thread
From: Sean Christopherson @ 2020-04-10 15:35 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: linux-kernel, kvm, Paolo Bonzini, Vitaly Kuznetsov, Wanpeng Li,
	Jim Mattson, Joerg Roedel, Haiwei Li

On Fri, Apr 10, 2020 at 09:03:27AM +0800, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> IPI and Timer cause the main MSRs write vmexits in cloud environment 
> observation, let's optimize virtual IPI latency more aggressively to 
> inject target IPI as soon as possible.
> 
> Running kvm-unit-tests/vmexit.flat IPI testing on SKX server, disable 
> adaptive advance lapic timer and adaptive halt-polling to avoid the 
> interference, this patch can give another 7% improvement.
> 
> w/o fastpath -> fastpath            4238 -> 3543  16.4%
> fastpath     -> ultra fastpath      3543 -> 3293     7%
> w/o fastpath -> ultra fastpath      4238 -> 3293  22.3% 
> 
> This also revises the performance data in commit 1e9e2622a1 (KVM: VMX: 
> FIXED+PHYSICAL mode single target IPI fastpath), that testing adds
> --overcommit cpu-pm=on to kvm-unit-tests guest which is unnecessary.
> 
> Tested-by: Haiwei Li <lihaiwei@tencent.com>
> Cc: Haiwei Li <lihaiwei@tencent.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
> v1 -> v2:
>  * rebase on latest kvm/queue
>  * update patch description
> 
>  arch/x86/include/asm/kvm_host.h |  6 +++---
>  arch/x86/kvm/svm/svm.c          | 21 ++++++++++++++-------
>  arch/x86/kvm/vmx/vmx.c          | 19 +++++++++++++------
>  arch/x86/kvm/x86.c              |  4 ++--
>  4 files changed, 32 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index c7da23a..e667cf3 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1124,7 +1124,8 @@ struct kvm_x86_ops {
>  	 */
>  	void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
>  
> -	void (*run)(struct kvm_vcpu *vcpu);
> +	void (*run)(struct kvm_vcpu *vcpu,
> +		enum exit_fastpath_completion *exit_fastpath);
>  	int (*handle_exit)(struct kvm_vcpu *vcpu,
>  		enum exit_fastpath_completion exit_fastpath);
>  	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
> @@ -1174,8 +1175,7 @@ struct kvm_x86_ops {
>  			       struct x86_instruction_info *info,
>  			       enum x86_intercept_stage stage,
>  			       struct x86_exception *exception);
> -	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
> -		enum exit_fastpath_completion *exit_fastpath);
> +	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
>  
>  	int (*check_nested_events)(struct kvm_vcpu *vcpu);
>  	void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 27f4684..c019332 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3283,9 +3283,20 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
>  	svm_complete_interrupts(svm);
>  }
>  
> +static enum exit_fastpath_completion svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
> +{
> +	if (!is_guest_mode(vcpu) &&
> +	    to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
> +	    to_svm(vcpu)->vmcb->control.exit_info_1)
> +		return handle_fastpath_set_msr_irqoff(vcpu);
> +
> +	return EXIT_FASTPATH_NONE;
> +}
> +
>  bool __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
>  
> -static void svm_vcpu_run(struct kvm_vcpu *vcpu)
> +static void svm_vcpu_run(struct kvm_vcpu *vcpu,
> +	enum exit_fastpath_completion *exit_fastpath)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> @@ -3388,6 +3399,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
>  	kvm_load_host_xsave_state(vcpu);
>  	stgi();
>  
> +	*exit_fastpath = svm_exit_handlers_fastpath(vcpu);
>  	/* Any pending NMI will happen here */
>  
>  	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
> @@ -3719,13 +3731,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
>  	return ret;
>  }
>  
> -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
> -	enum exit_fastpath_completion *exit_fastpath)
> +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
>  {
> -	if (!is_guest_mode(vcpu) &&
> -	    to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
> -	    to_svm(vcpu)->vmcb->control.exit_info_1)
> -		*exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
>  }
>  
>  static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 1d2bb57..61a1725 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6354,8 +6354,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
>  }
>  STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
>  
> -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
> -	enum exit_fastpath_completion *exit_fastpath)
> +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  
> @@ -6363,9 +6362,6 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
>  		handle_external_interrupt_irqoff(vcpu);
>  	else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
>  		handle_exception_nmi_irqoff(vmx);
> -	else if (!is_guest_mode(vcpu) &&
> -		vmx->exit_reason == EXIT_REASON_MSR_WRITE)
> -		*exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
>  }
>  
>  static bool vmx_has_emulated_msr(int index)
> @@ -6570,9 +6566,19 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
>  	}
>  }
>  
> +static enum exit_fastpath_completion vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
> +{
> +	if (!is_guest_mode(vcpu) &&
> +		to_vmx(vcpu)->exit_reason == EXIT_REASON_MSR_WRITE)

Bad indentation.

> +		return handle_fastpath_set_msr_irqoff(vcpu);
> +
> +	return EXIT_FASTPATH_NONE;
> +}
> +
>  bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
>  
> -static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
> +static void vmx_vcpu_run(struct kvm_vcpu *vcpu,
> +	enum exit_fastpath_completion *exit_fastpath)

Why pass a pointer instead of returning the enum?

>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	unsigned long cr3, cr4;
> @@ -6737,6 +6743,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  	vmx->idt_vectoring_info = 0;
>  
>  	vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
> +	*exit_fastpath = vmx_exit_handlers_fastpath(vcpu);

IMO, this should come at the very end of vmx_vcpu_run().  At a minimum, it
needs to be moved below the #MC handling and below

	if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
		return;

KVM more or less assumes vmx->idt_vectoring_info is always valid, and it's
not obvious that a generic fastpath call can safely run before
vmx_complete_interrupts(), e.g. the kvm_clear_interrupt_queue() call.

In a normal scenario, the added latency is <50 cycles.  ~30 for the VMREAD
of IDT_VECTORING_INFO_FIELD, a handful of zeroing instructions, and a few
CMP+Jcc style uops to skip NMI blocking and interrupt completion.

And if the result is returned, it means VMX won't need a local variable, e.g.:

	vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
	if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
		kvm_machine_check();

	if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
		return EXIT_FASTPATH_NONE;

	vmx->loaded_vmcs->launched = 1;
	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);

	vmx_recover_nmi_blocking(vmx);
	vmx_complete_interrupts(vmx);

	return vmx_exit_handlers_fastpath(vcpu);
}

>  	if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
>  		kvm_machine_check();
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 3089aa4..eed31e2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8409,7 +8409,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
>  	}
>  
> -	kvm_x86_ops.run(vcpu);
> +	kvm_x86_ops.run(vcpu, &exit_fastpath);

Pretty sre 
>  
>  	/*
>  	 * Do this here before restoring debug registers on the host.  And
> @@ -8441,7 +8441,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	vcpu->mode = OUTSIDE_GUEST_MODE;
>  	smp_wmb();
>  
> -	kvm_x86_ops.handle_exit_irqoff(vcpu, &exit_fastpath);
> +	kvm_x86_ops.handle_exit_irqoff(vcpu);
>  
>  	/*
>  	 * Consume any pending interrupts, including the possible source of
> -- 
> 2.7.4
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] KVM: X86: Ultra fast single target IPI fastpath
  2020-04-10 15:35 ` Sean Christopherson
@ 2020-04-10 15:50   ` Paolo Bonzini
  2020-04-10 17:47     ` Sean Christopherson
  0 siblings, 1 reply; 10+ messages in thread
From: Paolo Bonzini @ 2020-04-10 15:50 UTC (permalink / raw)
  To: Sean Christopherson, Wanpeng Li
  Cc: linux-kernel, kvm, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Haiwei Li

On 10/04/20 17:35, Sean Christopherson wrote:
> IMO, this should come at the very end of vmx_vcpu_run().  At a minimum, it
> needs to be moved below the #MC handling and below
> 
> 	if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
> 		return;

Why?  It cannot run in any of those cases, since the vmx->exit_reason
won't match.

> KVM more or less assumes vmx->idt_vectoring_info is always valid, and it's
> not obvious that a generic fastpath call can safely run before
> vmx_complete_interrupts(), e.g. the kvm_clear_interrupt_queue() call.

Not KVM, rather vmx.c.  You're right about a generic fastpath, but in
this case kvm_irq_delivery_to_apic_fast is not touching VMX state; even
if you have a self-IPI, the modification of vCPU state is only scheduled
here and will happen later via either kvm_x86_ops.sync_pir_to_irr or
KVM_REQ_EVENT.

Paolo


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] KVM: X86: Ultra fast single target IPI fastpath
  2020-04-10 15:50   ` Paolo Bonzini
@ 2020-04-10 17:47     ` Sean Christopherson
  2020-04-10 17:47       ` [PATCH v3 1/2] KVM: VMX: Optimize handling of VM-Entry failures in vmx_vcpu_run() Sean Christopherson
                         ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Sean Christopherson @ 2020-04-10 17:47 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, linux-kernel, Haiwei Li

On Fri, Apr 10, 2020 at 05:50:35PM +0200, Paolo Bonzini wrote:
> On 10/04/20 17:35, Sean Christopherson wrote:
> > IMO, this should come at the very end of vmx_vcpu_run().  At a minimum, it
> > needs to be moved below the #MC handling and below
> >
> >     if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
> >             return;
>
> Why?  It cannot run in any of those cases, since the vmx->exit_reason
> won't match.

#MC and consistency checks should have "priority" over everything else.
That there isn't actually a conflict is irrelevant IMO.  And it's something
that will likely confuse newbies (to VMX and/or KVM) as it won't be obvious
that the motivation was to shave a few cycles, e.g. versus some corner case
where the fastpath handling does something meaningful even on failure.

> > KVM more or less assumes vmx->idt_vectoring_info is always valid, and it's
> > not obvious that a generic fastpath call can safely run before
> > vmx_complete_interrupts(), e.g. the kvm_clear_interrupt_queue() call.
>
> Not KVM, rather vmx.c.  You're right about a generic fastpath, but in
> this case kvm_irq_delivery_to_apic_fast is not touching VMX state; even
> if you have a self-IPI, the modification of vCPU state is only scheduled
> here and will happen later via either kvm_x86_ops.sync_pir_to_irr or
> KVM_REQ_EVENT.

I think what I don't like is that the fast-IPI code is buried in a helper
that masquerades as a generic fastpath handler.  If that's open-coded in
vmx_vcpu_run(), I'm ok with doing the fast-IPI handler immediately after
the failure checks.

And fast-IPI aside, the code could use a bit of optimization to prioritize
successful VM-Enter, which would slot in nicely as a prep patch.  Patches
(should be) following.

IMO, this is more logically correct:

	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
	if (unlikely((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY))
		kvm_machine_check();

	if (unlikely(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
		return EXIT_FASTPATH_NONE;

	if (!is_guest_mode(vcpu) && vmx->exit_reason == EXIT_REASON_MSR_WRITE)
		exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
	else
		exit_fastpath = EXIT_FASTPATH_NONE;

And on my system, the compiler hoists fast-IPI above the #MC, e.g. moving
the fast-IPI down only adds a single macrofused uop, testb+jne for
FAILED_VMENTERY, to the code path.

   0xffffffff81067d1d <+701>:   vmread %rax,%rax
   0xffffffff81067d20 <+704>:   ja,pt  0xffffffff81067d2d <vmx_vcpu_run+717>
   0xffffffff81067d23 <+707>:   pushq  $0x0
   0xffffffff81067d25 <+709>:   push   %rax
   0xffffffff81067d26 <+710>:   callq  0xffffffff81071790 <vmread_error_trampoline>
   0xffffffff81067d2b <+715>:   pop    %rax
   0xffffffff81067d2c <+716>:   pop    %rax
   0xffffffff81067d2d <+717>:   test   %eax,%eax
   0xffffffff81067d2f <+719>:   mov    %eax,0x32b0(%rbp)
   0xffffffff81067d35 <+725>:   js     0xffffffff81067d5a <vmx_vcpu_run+762>
   0xffffffff81067d37 <+727>:   testb  $0x20,0x2dc(%rbp)
   0xffffffff81067d3e <+734>:   jne    0xffffffff81067d49 <vmx_vcpu_run+745>
   0xffffffff81067d40 <+736>:   cmp    $0x20,%eax
   0xffffffff81067d43 <+739>:   je     0xffffffff810686d4 <vmx_vcpu_run+3188> <-- fastpath handler
   0xffffffff81067d49 <+745>:   xor    %ebx,%ebx
   0xffffffff81067d4b <+747>:   jmpq   0xffffffff81067e65 <vmx_vcpu_run+1029>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3 1/2] KVM: VMX: Optimize handling of VM-Entry failures in vmx_vcpu_run()
  2020-04-10 17:47     ` Sean Christopherson
@ 2020-04-10 17:47       ` Sean Christopherson
  2020-04-10 17:47       ` [PATCH v3 2/2] KVM: X86: Ultra fast single target IPI fastpath Sean Christopherson
                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Sean Christopherson @ 2020-04-10 17:47 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, linux-kernel, Haiwei Li

Mark the VM-Fail, VM-Exit on VM-Enter, and #MC on VM-Enter paths as
'unlikely' so as to improve code generation so that it favors successful
VM-Enter.  The performance of successful VM-Enter is for more important,
irrespective of whether or not success is actually likely.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1d2bb57f4ac4..a8402bed29e3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6736,11 +6736,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx->nested.nested_run_pending = 0;
 	vmx->idt_vectoring_info = 0;
 
-	vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
-	if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
+	if (unlikely(vmx->fail)) {
+		vmx->exit_reason = 0xdead;
+		return;
+	}
+
+	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
+	if (unlikely((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY))
 		kvm_machine_check();
 
-	if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+	if (unlikely(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
 		return;
 
 	vmx->loaded_vmcs->launched = 1;
-- 
2.26.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v3 2/2] KVM: X86: Ultra fast single target IPI fastpath
  2020-04-10 17:47     ` Sean Christopherson
  2020-04-10 17:47       ` [PATCH v3 1/2] KVM: VMX: Optimize handling of VM-Entry failures in vmx_vcpu_run() Sean Christopherson
@ 2020-04-10 17:47       ` Sean Christopherson
  2020-04-10 18:43       ` [PATCH v2] " Paolo Bonzini
  2020-04-11  0:34       ` Wanpeng Li
  3 siblings, 0 replies; 10+ messages in thread
From: Sean Christopherson @ 2020-04-10 17:47 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, linux-kernel, Haiwei Li

From: Wanpeng Li <wanpengli@tencent.com>

IPI and Timer cause the main MSRs write vmexits in cloud environment
observation, let's optimize virtual IPI latency more aggressively to
inject target IPI as soon as possible.

Running kvm-unit-tests/vmexit.flat IPI testing on SKX server, disable
adaptive advance lapic timer and adaptive halt-polling to avoid the
interference, this patch can give another 7% improvement.

w/o fastpath -> fastpath            4238 -> 3543  16.4%
fastpath     -> ultra fastpath      3543 -> 3293     7%
w/o fastpath -> ultra fastpath      4238 -> 3293  22.3%

This also revises the performance data in commit 1e9e2622a1 (KVM: VMX:
FIXED+PHYSICAL mode single target IPI fastpath), that testing adds
--overcommit cpu-pm=on to kvm-unit-tests guest which is unnecessary.

Cc: Haiwei Li <lihaiwei@tencent.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---

Note, I dropped Haiwei Li's tested tag.

 arch/x86/include/asm/kvm_host.h |  5 ++---
 arch/x86/kvm/svm/svm.c          | 24 ++++++++++++++++--------
 arch/x86/kvm/vmx/vmx.c          | 22 +++++++++++++---------
 arch/x86/kvm/x86.c              |  6 +++---
 4 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c7da23aed79a..40cb197b4903 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1124,7 +1124,7 @@ struct kvm_x86_ops {
 	 */
 	void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
 
-	void (*run)(struct kvm_vcpu *vcpu);
+	enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu);
 	int (*handle_exit)(struct kvm_vcpu *vcpu,
 		enum exit_fastpath_completion exit_fastpath);
 	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
@@ -1174,8 +1174,7 @@ struct kvm_x86_ops {
 			       struct x86_instruction_info *info,
 			       enum x86_intercept_stage stage,
 			       struct x86_exception *exception);
-	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
-		enum exit_fastpath_completion *exit_fastpath);
+	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
 
 	int (*check_nested_events)(struct kvm_vcpu *vcpu);
 	void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 27f4684a4c20..f40aa5a4253e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3283,10 +3283,21 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
 	svm_complete_interrupts(svm);
 }
 
+static enum exit_fastpath_completion svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+{
+	if (!is_guest_mode(vcpu) &&
+	    to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+	    to_svm(vcpu)->vmcb->control.exit_info_1)
+		return handle_fastpath_set_msr_irqoff(vcpu);
+
+	return EXIT_FASTPATH_NONE;
+}
+
 bool __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
 
-static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+static enum exit_fastpath_completion svm_vcpu_run(struct kvm_vcpu *vcpu)
 {
+	enum exit_fastpath_completion exit_fastpath;
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
@@ -3298,7 +3309,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	 * again.
 	 */
 	if (unlikely(svm->nested.exit_required))
-		return;
+		return EXIT_FASTPATH_NONE;
 
 	/*
 	 * Disable singlestep if we're injecting an interrupt/exception.
@@ -3389,6 +3400,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	stgi();
 
 	/* Any pending NMI will happen here */
+	exit_fastpath = svm_exit_handlers_fastpath(vcpu);
 
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
 		kvm_after_interrupt(&svm->vcpu);
@@ -3417,6 +3429,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 		svm_handle_mce(svm);
 
 	mark_all_clean(svm->vmcb);
+	return exit_fastpath;
 }
 STACK_FRAME_NON_STANDARD(svm_vcpu_run);
 
@@ -3719,13 +3732,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
 	return ret;
 }
 
-static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
-	enum exit_fastpath_completion *exit_fastpath)
+static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 {
-	if (!is_guest_mode(vcpu) &&
-	    to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
-	    to_svm(vcpu)->vmcb->control.exit_info_1)
-		*exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
 }
 
 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a8402bed29e3..a2e6c93a563b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6354,8 +6354,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 }
 STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
 
-static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
-	enum exit_fastpath_completion *exit_fastpath)
+static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
@@ -6363,9 +6362,6 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
 		handle_external_interrupt_irqoff(vcpu);
 	else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
 		handle_exception_nmi_irqoff(vmx);
-	else if (!is_guest_mode(vcpu) &&
-		vmx->exit_reason == EXIT_REASON_MSR_WRITE)
-		*exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
 }
 
 static bool vmx_has_emulated_msr(int index)
@@ -6572,8 +6568,9 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
 
 bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
 
-static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static enum exit_fastpath_completion vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
+	enum exit_fastpath_completion exit_fastpath;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long cr3, cr4;
 
@@ -6585,7 +6582,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	/* Don't enter VMX if guest state is invalid, let the exit handler
 	   start emulation until we arrive back to a valid state */
 	if (vmx->emulation_required)
-		return;
+		return EXIT_FASTPATH_NONE;
 
 	if (vmx->ple_window_dirty) {
 		vmx->ple_window_dirty = false;
@@ -6738,7 +6735,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	if (unlikely(vmx->fail)) {
 		vmx->exit_reason = 0xdead;
-		return;
+		return EXIT_FASTPATH_NONE;
 	}
 
 	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
@@ -6746,13 +6743,20 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		kvm_machine_check();
 
 	if (unlikely(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
-		return;
+		return EXIT_FASTPATH_NONE;
+
+	if (!is_guest_mode(vcpu) && vmx->exit_reason == EXIT_REASON_MSR_WRITE)
+		exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
+	else
+		exit_fastpath = EXIT_FASTPATH_NONE;
 
 	vmx->loaded_vmcs->launched = 1;
 	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
 	vmx_recover_nmi_blocking(vmx);
 	vmx_complete_interrupts(vmx);
+
+	return exit_fastpath;
 }
 
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3089aa4ffedf..1c1af0d31267 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8182,7 +8182,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	bool req_int_win =
 		dm_request_for_irq_injection(vcpu) &&
 		kvm_cpu_accept_dm_intr(vcpu);
-	enum exit_fastpath_completion exit_fastpath = EXIT_FASTPATH_NONE;
+	enum exit_fastpath_completion exit_fastpath;
 
 	bool req_immediate_exit = false;
 
@@ -8409,7 +8409,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	}
 
-	kvm_x86_ops.run(vcpu);
+	exit_fastpath = kvm_x86_ops.run(vcpu);
 
 	/*
 	 * Do this here before restoring debug registers on the host.  And
@@ -8441,7 +8441,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
 
-	kvm_x86_ops.handle_exit_irqoff(vcpu, &exit_fastpath);
+	kvm_x86_ops.handle_exit_irqoff(vcpu);
 
 	/*
 	 * Consume any pending interrupts, including the possible source of
-- 
2.26.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] KVM: X86: Ultra fast single target IPI fastpath
  2020-04-10 17:47     ` Sean Christopherson
  2020-04-10 17:47       ` [PATCH v3 1/2] KVM: VMX: Optimize handling of VM-Entry failures in vmx_vcpu_run() Sean Christopherson
  2020-04-10 17:47       ` [PATCH v3 2/2] KVM: X86: Ultra fast single target IPI fastpath Sean Christopherson
@ 2020-04-10 18:43       ` Paolo Bonzini
  2020-04-11  0:34       ` Wanpeng Li
  3 siblings, 0 replies; 10+ messages in thread
From: Paolo Bonzini @ 2020-04-10 18:43 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel, kvm,
	linux-kernel, Haiwei Li

On 10/04/20 19:47, Sean Christopherson wrote:
> If that's open-coded in vmx_vcpu_run(), I'm ok with doing the
> fast-IPI handler immediately after the failure checks.
> 
> And fast-IPI aside, the code could use a bit of optimization to
> prioritize successful VM-Enter, which would slot in nicely as a prep
> patch.

Yes, I agree with these.

Paolo


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] KVM: X86: Ultra fast single target IPI fastpath
  2020-04-10 17:47     ` Sean Christopherson
                         ` (2 preceding siblings ...)
  2020-04-10 18:43       ` [PATCH v2] " Paolo Bonzini
@ 2020-04-11  0:34       ` Wanpeng Li
  3 siblings, 0 replies; 10+ messages in thread
From: Wanpeng Li @ 2020-04-11  0:34 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Paolo Bonzini, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, LKML, Haiwei Li

On Sat, 11 Apr 2020 at 01:47, Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Fri, Apr 10, 2020 at 05:50:35PM +0200, Paolo Bonzini wrote:
> > On 10/04/20 17:35, Sean Christopherson wrote:
> > > IMO, this should come at the very end of vmx_vcpu_run().  At a minimum, it
> > > needs to be moved below the #MC handling and below
> > >
> > >     if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
> > >             return;
> >
> > Why?  It cannot run in any of those cases, since the vmx->exit_reason
> > won't match.
>
> #MC and consistency checks should have "priority" over everything else.
> That there isn't actually a conflict is irrelevant IMO.  And it's something
> that will likely confuse newbies (to VMX and/or KVM) as it won't be obvious
> that the motivation was to shave a few cycles, e.g. versus some corner case
> where the fastpath handling does something meaningful even on failure.
>
> > > KVM more or less assumes vmx->idt_vectoring_info is always valid, and it's
> > > not obvious that a generic fastpath call can safely run before
> > > vmx_complete_interrupts(), e.g. the kvm_clear_interrupt_queue() call.
> >
> > Not KVM, rather vmx.c.  You're right about a generic fastpath, but in
> > this case kvm_irq_delivery_to_apic_fast is not touching VMX state; even
> > if you have a self-IPI, the modification of vCPU state is only scheduled
> > here and will happen later via either kvm_x86_ops.sync_pir_to_irr or
> > KVM_REQ_EVENT.
>
> I think what I don't like is that the fast-IPI code is buried in a helper
> that masquerades as a generic fastpath handler.  If that's open-coded in
> vmx_vcpu_run(), I'm ok with doing the fast-IPI handler immediately after
> the failure checks.
>
> And fast-IPI aside, the code could use a bit of optimization to prioritize
> successful VM-Enter, which would slot in nicely as a prep patch.  Patches
> (should be) following.

Thanks for v3. :)

>
> IMO, this is more logically correct:
>
>         vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
>         if (unlikely((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY))
>                 kvm_machine_check();
>
>         if (unlikely(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
>                 return EXIT_FASTPATH_NONE;
>
>         if (!is_guest_mode(vcpu) && vmx->exit_reason == EXIT_REASON_MSR_WRITE)
>                 exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
>         else
>                 exit_fastpath = EXIT_FASTPATH_NONE;
>
> And on my system, the compiler hoists fast-IPI above the #MC, e.g. moving
> the fast-IPI down only adds a single macrofused uop, testb+jne for
> FAILED_VMENTERY, to the code path.
>
>    0xffffffff81067d1d <+701>:   vmread %rax,%rax
>    0xffffffff81067d20 <+704>:   ja,pt  0xffffffff81067d2d <vmx_vcpu_run+717>
>    0xffffffff81067d23 <+707>:   pushq  $0x0
>    0xffffffff81067d25 <+709>:   push   %rax
>    0xffffffff81067d26 <+710>:   callq  0xffffffff81071790 <vmread_error_trampoline>
>    0xffffffff81067d2b <+715>:   pop    %rax
>    0xffffffff81067d2c <+716>:   pop    %rax
>    0xffffffff81067d2d <+717>:   test   %eax,%eax
>    0xffffffff81067d2f <+719>:   mov    %eax,0x32b0(%rbp)
>    0xffffffff81067d35 <+725>:   js     0xffffffff81067d5a <vmx_vcpu_run+762>
>    0xffffffff81067d37 <+727>:   testb  $0x20,0x2dc(%rbp)
>    0xffffffff81067d3e <+734>:   jne    0xffffffff81067d49 <vmx_vcpu_run+745>
>    0xffffffff81067d40 <+736>:   cmp    $0x20,%eax
>    0xffffffff81067d43 <+739>:   je     0xffffffff810686d4 <vmx_vcpu_run+3188> <-- fastpath handler
>    0xffffffff81067d49 <+745>:   xor    %ebx,%ebx
>    0xffffffff81067d4b <+747>:   jmpq   0xffffffff81067e65 <vmx_vcpu_run+1029>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] KVM: X86: Ultra fast single target IPI fastpath
  2020-04-10 15:17 ` Paolo Bonzini
@ 2020-04-13  1:43   ` Wanpeng Li
  0 siblings, 0 replies; 10+ messages in thread
From: Wanpeng Li @ 2020-04-13  1:43 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: LKML, kvm, Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li,
	Jim Mattson, Joerg Roedel, Haiwei Li

On Fri, 10 Apr 2020 at 23:17, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 10/04/20 03:03, Wanpeng Li wrote:
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > IPI and Timer cause the main MSRs write vmexits in cloud environment
> > observation, let's optimize virtual IPI latency more aggressively to
> > inject target IPI as soon as possible.
> >
> > Running kvm-unit-tests/vmexit.flat IPI testing on SKX server, disable
> > adaptive advance lapic timer and adaptive halt-polling to avoid the
> > interference, this patch can give another 7% improvement.
> >
> > w/o fastpath -> fastpath            4238 -> 3543  16.4%
> > fastpath     -> ultra fastpath      3543 -> 3293     7%
> > w/o fastpath -> ultra fastpath      4238 -> 3293  22.3%
> >
> > This also revises the performance data in commit 1e9e2622a1 (KVM: VMX:
> > FIXED+PHYSICAL mode single target IPI fastpath), that testing adds
> > --overcommit cpu-pm=on to kvm-unit-tests guest which is unnecessary.
> >
> > Tested-by: Haiwei Li <lihaiwei@tencent.com>
> > Cc: Haiwei Li <lihaiwei@tencent.com>
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > ---
> > v1 -> v2:
> >  * rebase on latest kvm/queue
> >  * update patch description
> >
> >  arch/x86/include/asm/kvm_host.h |  6 +++---
> >  arch/x86/kvm/svm/svm.c          | 21 ++++++++++++++-------
> >  arch/x86/kvm/vmx/vmx.c          | 19 +++++++++++++------
> >  arch/x86/kvm/x86.c              |  4 ++--
> >  4 files changed, 32 insertions(+), 18 deletions(-)
>
> That's less ugly than I expected. :D  I'll queue it in the next week or
> so.  But even though the commit subject is cool, I'll change it to "KVM:
> x86: move IPI fastpath inside kvm_x86_ops.run".

Thanks.

    Wanpeng

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-04-13  1:44 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-10  1:03 [PATCH v2] KVM: X86: Ultra fast single target IPI fastpath Wanpeng Li
2020-04-10 15:17 ` Paolo Bonzini
2020-04-13  1:43   ` Wanpeng Li
2020-04-10 15:35 ` Sean Christopherson
2020-04-10 15:50   ` Paolo Bonzini
2020-04-10 17:47     ` Sean Christopherson
2020-04-10 17:47       ` [PATCH v3 1/2] KVM: VMX: Optimize handling of VM-Entry failures in vmx_vcpu_run() Sean Christopherson
2020-04-10 17:47       ` [PATCH v3 2/2] KVM: X86: Ultra fast single target IPI fastpath Sean Christopherson
2020-04-10 18:43       ` [PATCH v2] " Paolo Bonzini
2020-04-11  0:34       ` Wanpeng Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).