linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
@ 2019-11-20  3:42 Wanpeng Li
  2019-11-20  3:42 ` [PATCH v3 2/2] KVM: LAPIC: micro-optimize fixed mode ipi delivery Wanpeng Li
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Wanpeng Li @ 2019-11-20  3:42 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Liran Alon

From: Wanpeng Li <wanpengli@tencent.com>

ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in 
our product observation, multicast IPIs are not as common as unicast 
IPI like RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.

This patch tries to optimize x2apic physical destination mode, fixed 
delivery mode single target IPI. The fast path is invoked at 
->handle_exit_irqoff() to emulate only the effect of the ICR write
itself, i.e. the sending of IPIs.  Sending IPIs early in the VM-Exit
flow reduces the latency of virtual IPIs by avoiding the expensive bits
of transitioning from guest to host, e.g. reacquiring KVM's SRCU lock.
Especially when running guest w/ KVM_CAP_X86_DISABLE_EXITS capability 
enabled or guest can keep running, IPI can be injected to target vCPU 
by posted-interrupt immediately.

Testing on Xeon Skylake server:

The virtual IPI latency from sender send to receiver receive reduces 
more than 200+ cpu cycles.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
v2 -> v3:
 * for both VMX and SVM
 * vmx_handle_exit() get second parameter by value and not by pointer
 * rename parameter to “accel_exit_completion”
 * preserve tracepoint ordering
 * rename handler to handle_accel_set_msr_irqoff and more generic
 * add comments above handle_accel_set_msr_irqoff
 * msr index APIC_BASE_MSR + (APIC_ICR >> 4)
v1 -> v2:
 * add tracepoint
 * Instead of a separate vcpu->fast_vmexit, set exit_reason
  to vmx->exit_reason to -1 if the fast path succeeds.
 * move the "kvm_skip_emulated_instruction(vcpu)" to vmx_handle_exit
 * moving the handling into vmx_handle_exit_irqoff()

 arch/x86/include/asm/kvm_host.h | 11 ++++++++--
 arch/x86/kvm/svm.c              | 14 ++++++++----
 arch/x86/kvm/vmx/vmx.c          | 13 ++++++++---
 arch/x86/kvm/x86.c              | 48 +++++++++++++++++++++++++++++++++++++++--
 arch/x86/kvm/x86.h              |  1 +
 5 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 898ab9e..67c7889 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -175,6 +175,11 @@ enum {
 	VCPU_SREG_LDTR,
 };
 
+enum accel_exit_completion {
+	ACCEL_EXIT_NONE,
+	ACCEL_EXIT_SKIP_EMUL_INS = -1,
+};
+
 #include <asm/kvm_emulate.h>
 
 #define KVM_NR_MEM_OBJS 40
@@ -1084,7 +1089,8 @@ struct kvm_x86_ops {
 	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
 
 	void (*run)(struct kvm_vcpu *vcpu);
-	int (*handle_exit)(struct kvm_vcpu *vcpu);
+	int (*handle_exit)(struct kvm_vcpu *vcpu,
+		enum accel_exit_completion accel_exit);
 	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
@@ -1134,7 +1140,8 @@ struct kvm_x86_ops {
 	int (*check_intercept)(struct kvm_vcpu *vcpu,
 			       struct x86_instruction_info *info,
 			       enum x86_intercept_stage stage);
-	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
+	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
+		enum accel_exit_completion *accel_exit);
 	bool (*mpx_supported)(void);
 	bool (*xsaves_supported)(void);
 	bool (*umip_emulated)(void);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d02a73a..060b11d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4929,7 +4929,8 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
 	*info2 = control->exit_info_2;
 }
 
-static int handle_exit(struct kvm_vcpu *vcpu)
+static int handle_exit(struct kvm_vcpu *vcpu,
+	enum accel_exit_completion accel_exit)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct kvm_run *kvm_run = vcpu->run;
@@ -4987,7 +4988,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 		       __func__, svm->vmcb->control.exit_int_info,
 		       exit_code);
 
-	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
+	if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
+		kvm_skip_emulated_instruction(vcpu);
+		return 1;
+	} else if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
 	    || !svm_exit_handlers[exit_code]) {
 		vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
 		dump_vmcb(vcpu);
@@ -6187,9 +6191,11 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
 	return ret;
 }
 
-static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
+	enum accel_exit_completion *accel_exit)
 {
-
+	if (to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE)
+		*accel_exit = handle_accel_set_msr_irqoff(vcpu);
 }
 
 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 621142e5..86c0a23 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5792,7 +5792,8 @@ void dump_vmcs(void)
  * The guest has exited.  See if we can fix it or if we need userspace
  * assistance.
  */
-static int vmx_handle_exit(struct kvm_vcpu *vcpu)
+static int vmx_handle_exit(struct kvm_vcpu *vcpu,
+	enum accel_exit_completion accel_exit)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 exit_reason = vmx->exit_reason;
@@ -5878,7 +5879,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	if (exit_reason < kvm_vmx_max_exit_handlers
+	if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
+		kvm_skip_emulated_instruction(vcpu);
+		return 1;
+	} else if (exit_reason < kvm_vmx_max_exit_handlers
 	    && kvm_vmx_exit_handlers[exit_reason]) {
 #ifdef CONFIG_RETPOLINE
 		if (exit_reason == EXIT_REASON_MSR_WRITE)
@@ -6223,7 +6227,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 }
 STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
 
-static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
+	enum accel_exit_completion *accel_exit)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
@@ -6231,6 +6236,8 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 		handle_external_interrupt_irqoff(vcpu);
 	else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
 		handle_exception_nmi_irqoff(vmx);
+	else if (vmx->exit_reason == EXIT_REASON_MSR_WRITE)
+		*accel_exit = handle_accel_set_msr_irqoff(vcpu);
 }
 
 static bool vmx_has_emulated_msr(int index)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 991dd01..966c659 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1509,6 +1509,49 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
 
+static int handle_accel_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
+{
+	if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
+		((data & KVM_APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
+		((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
+
+		kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
+		return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
+	}
+
+	return 1;
+}
+
+/*
+ * The fast path for frequent and performance sensitive wrmsr emulation,
+ * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
+ * the latency of virtual IPI by avoiding the expensive bits of transitioning
+ * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the
+ * other cases which must be called after interrupts are enabled on the host.
+ */
+enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu)
+{
+	u32 msr = kvm_rcx_read(vcpu);
+	u64 data = kvm_read_edx_eax(vcpu);
+	int ret = 0;
+
+	switch (msr) {
+	case APIC_BASE_MSR + (APIC_ICR >> 4):
+		ret = handle_accel_set_x2apic_icr_irqoff(vcpu, data);
+		break;
+	default:
+		return ACCEL_EXIT_NONE;
+	}
+
+	if (!ret) {
+		trace_kvm_msr_write(msr, data);
+		return ACCEL_EXIT_SKIP_EMUL_INS;
+	}
+
+	return ACCEL_EXIT_NONE;
+}
+EXPORT_SYMBOL_GPL(handle_accel_set_msr_irqoff);
+
 /*
  * Adapt set_msr() to msr_io()'s calling convention
  */
@@ -7984,6 +8027,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	bool req_int_win =
 		dm_request_for_irq_injection(vcpu) &&
 		kvm_cpu_accept_dm_intr(vcpu);
+	enum accel_exit_completion accel_exit = ACCEL_EXIT_NONE;
 
 	bool req_immediate_exit = false;
 
@@ -8226,7 +8270,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
 
-	kvm_x86_ops->handle_exit_irqoff(vcpu);
+	kvm_x86_ops->handle_exit_irqoff(vcpu, &accel_exit);
 
 	/*
 	 * Consume any pending interrupts, including the possible source of
@@ -8270,7 +8314,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		kvm_lapic_sync_from_vapic(vcpu);
 
 	vcpu->arch.gpa_available = false;
-	r = kvm_x86_ops->handle_exit(vcpu);
+	r = kvm_x86_ops->handle_exit(vcpu, accel_exit);
 	return r;
 
 cancel_injection:
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 29391af..f14ec14 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -291,6 +291,7 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
 bool kvm_vector_hashing_enabled(void);
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
 			    int emulation_type, void *insn, int insn_len);
+enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu);
 
 #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
 				| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 2/2] KVM: LAPIC: micro-optimize fixed mode ipi delivery
  2019-11-20  3:42 [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
@ 2019-11-20  3:42 ` Wanpeng Li
  2019-11-20  5:31 ` [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
  2019-11-20 23:36 ` Liran Alon
  2 siblings, 0 replies; 7+ messages in thread
From: Wanpeng Li @ 2019-11-20  3:42 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel

From: Wanpeng Li <wanpengli@tencent.com>

This patch optimizes redundancy logic before fixed mode ipi is delivered
in the fast path, broadcast handling needs to go slow path, so the delivery
mode repair can be delayed to before slow path.

Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kvm/irq_comm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8ecd48d..aa88156 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -52,15 +52,15 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
 	unsigned int dest_vcpus = 0;
 
+	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
+		return r;
+
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
 			kvm_lowest_prio_delivery(irq)) {
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
 		irq->delivery_mode = APIC_DM_FIXED;
 	}
 
-	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
-		return r;
-
 	memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
  2019-11-20  3:42 [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
  2019-11-20  3:42 ` [PATCH v3 2/2] KVM: LAPIC: micro-optimize fixed mode ipi delivery Wanpeng Li
@ 2019-11-20  5:31 ` Wanpeng Li
  2019-11-20 23:36 ` Liran Alon
  2 siblings, 0 replies; 7+ messages in thread
From: Wanpeng Li @ 2019-11-20  5:31 UTC (permalink / raw)
  To: LKML, kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Liran Alon

On Wed, 20 Nov 2019 at 11:42, Wanpeng Li <kernellwp@gmail.com> wrote:
>
> From: Wanpeng Li <wanpengli@tencent.com>
>
> ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in
> our product observation, multicast IPIs are not as common as unicast
> IPI like RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.
>
> This patch tries to optimize x2apic physical destination mode, fixed
> delivery mode single target IPI. The fast path is invoked at
> ->handle_exit_irqoff() to emulate only the effect of the ICR write
> itself, i.e. the sending of IPIs.  Sending IPIs early in the VM-Exit
> flow reduces the latency of virtual IPIs by avoiding the expensive bits
> of transitioning from guest to host, e.g. reacquiring KVM's SRCU lock.
> Especially when running guest w/ KVM_CAP_X86_DISABLE_EXITS capability
> enabled or guest can keep running, IPI can be injected to target vCPU
> by posted-interrupt immediately.
>
> Testing on Xeon Skylake server:
>
> The virtual IPI latency from sender send to receiver receive reduces
> more than 200+ cpu cycles.
>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> Cc: Liran Alon <liran.alon@oracle.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
> v2 -> v3:
>  * for both VMX and SVM
>  * vmx_handle_exit() get second parameter by value and not by pointer
>  * rename parameter to “accel_exit_completion”
>  * preserve tracepoint ordering
>  * rename handler to handle_accel_set_msr_irqoff and more generic
>  * add comments above handle_accel_set_msr_irqoff
>  * msr index APIC_BASE_MSR + (APIC_ICR >> 4)
> v1 -> v2:
>  * add tracepoint
>  * Instead of a separate vcpu->fast_vmexit, set exit_reason
>   to vmx->exit_reason to -1 if the fast path succeeds.
>  * move the "kvm_skip_emulated_instruction(vcpu)" to vmx_handle_exit
>  * moving the handling into vmx_handle_exit_irqoff()
>
>  arch/x86/include/asm/kvm_host.h | 11 ++++++++--
>  arch/x86/kvm/svm.c              | 14 ++++++++----
>  arch/x86/kvm/vmx/vmx.c          | 13 ++++++++---
>  arch/x86/kvm/x86.c              | 48 +++++++++++++++++++++++++++++++++++++++--
>  arch/x86/kvm/x86.h              |  1 +
>  5 files changed, 76 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 898ab9e..67c7889 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -175,6 +175,11 @@ enum {
>         VCPU_SREG_LDTR,
>  };
>
> +enum accel_exit_completion {
> +       ACCEL_EXIT_NONE,
> +       ACCEL_EXIT_SKIP_EMUL_INS = -1,

Don't need to be -1 any more, I guess maintainer can clean it up if
this is the last version.

> +};
> +
>  #include <asm/kvm_emulate.h>
>
>  #define KVM_NR_MEM_OBJS 40
> @@ -1084,7 +1089,8 @@ struct kvm_x86_ops {
>         void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
>
>         void (*run)(struct kvm_vcpu *vcpu);
> -       int (*handle_exit)(struct kvm_vcpu *vcpu);
> +       int (*handle_exit)(struct kvm_vcpu *vcpu,
> +               enum accel_exit_completion accel_exit);
>         int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
>         void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
>         u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
> @@ -1134,7 +1140,8 @@ struct kvm_x86_ops {
>         int (*check_intercept)(struct kvm_vcpu *vcpu,
>                                struct x86_instruction_info *info,
>                                enum x86_intercept_stage stage);
> -       void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
> +       void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
> +               enum accel_exit_completion *accel_exit);
>         bool (*mpx_supported)(void);
>         bool (*xsaves_supported)(void);
>         bool (*umip_emulated)(void);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d02a73a..060b11d 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -4929,7 +4929,8 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
>         *info2 = control->exit_info_2;
>  }
>
> -static int handle_exit(struct kvm_vcpu *vcpu)
> +static int handle_exit(struct kvm_vcpu *vcpu,
> +       enum accel_exit_completion accel_exit)
>  {
>         struct vcpu_svm *svm = to_svm(vcpu);
>         struct kvm_run *kvm_run = vcpu->run;
> @@ -4987,7 +4988,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
>                        __func__, svm->vmcb->control.exit_int_info,
>                        exit_code);
>
> -       if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
> +       if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
> +               kvm_skip_emulated_instruction(vcpu);
> +               return 1;
> +       } else if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
>             || !svm_exit_handlers[exit_code]) {
>                 vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
>                 dump_vmcb(vcpu);
> @@ -6187,9 +6191,11 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
>         return ret;
>  }
>
> -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
> +       enum accel_exit_completion *accel_exit)
>  {
> -
> +       if (to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE)
> +               *accel_exit = handle_accel_set_msr_irqoff(vcpu);
>  }
>
>  static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 621142e5..86c0a23 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5792,7 +5792,8 @@ void dump_vmcs(void)
>   * The guest has exited.  See if we can fix it or if we need userspace
>   * assistance.
>   */
> -static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> +static int vmx_handle_exit(struct kvm_vcpu *vcpu,
> +       enum accel_exit_completion accel_exit)
>  {
>         struct vcpu_vmx *vmx = to_vmx(vcpu);
>         u32 exit_reason = vmx->exit_reason;
> @@ -5878,7 +5879,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
>                 }
>         }
>
> -       if (exit_reason < kvm_vmx_max_exit_handlers
> +       if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
> +               kvm_skip_emulated_instruction(vcpu);
> +               return 1;
> +       } else if (exit_reason < kvm_vmx_max_exit_handlers
>             && kvm_vmx_exit_handlers[exit_reason]) {
>  #ifdef CONFIG_RETPOLINE
>                 if (exit_reason == EXIT_REASON_MSR_WRITE)
> @@ -6223,7 +6227,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
>  }
>  STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
>
> -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
> +       enum accel_exit_completion *accel_exit)
>  {
>         struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> @@ -6231,6 +6236,8 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
>                 handle_external_interrupt_irqoff(vcpu);
>         else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
>                 handle_exception_nmi_irqoff(vmx);
> +       else if (vmx->exit_reason == EXIT_REASON_MSR_WRITE)
> +               *accel_exit = handle_accel_set_msr_irqoff(vcpu);
>  }
>
>  static bool vmx_has_emulated_msr(int index)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 991dd01..966c659 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1509,6 +1509,49 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
>  }
>  EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
>
> +static int handle_accel_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
> +{
> +       if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
> +               ((data & KVM_APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
> +               ((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
> +
> +               kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
> +               return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
> +       }
> +
> +       return 1;
> +}
> +
> +/*
> + * The fast path for frequent and performance sensitive wrmsr emulation,
> + * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
> + * the latency of virtual IPI by avoiding the expensive bits of transitioning
> + * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the
> + * other cases which must be called after interrupts are enabled on the host.
> + */
> +enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu)
> +{
> +       u32 msr = kvm_rcx_read(vcpu);
> +       u64 data = kvm_read_edx_eax(vcpu);
> +       int ret = 0;
> +
> +       switch (msr) {
> +       case APIC_BASE_MSR + (APIC_ICR >> 4):
> +               ret = handle_accel_set_x2apic_icr_irqoff(vcpu, data);
> +               break;
> +       default:
> +               return ACCEL_EXIT_NONE;
> +       }
> +
> +       if (!ret) {
> +               trace_kvm_msr_write(msr, data);
> +               return ACCEL_EXIT_SKIP_EMUL_INS;
> +       }
> +
> +       return ACCEL_EXIT_NONE;
> +}
> +EXPORT_SYMBOL_GPL(handle_accel_set_msr_irqoff);
> +
>  /*
>   * Adapt set_msr() to msr_io()'s calling convention
>   */
> @@ -7984,6 +8027,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>         bool req_int_win =
>                 dm_request_for_irq_injection(vcpu) &&
>                 kvm_cpu_accept_dm_intr(vcpu);
> +       enum accel_exit_completion accel_exit = ACCEL_EXIT_NONE;
>
>         bool req_immediate_exit = false;
>
> @@ -8226,7 +8270,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>         vcpu->mode = OUTSIDE_GUEST_MODE;
>         smp_wmb();
>
> -       kvm_x86_ops->handle_exit_irqoff(vcpu);
> +       kvm_x86_ops->handle_exit_irqoff(vcpu, &accel_exit);
>
>         /*
>          * Consume any pending interrupts, including the possible source of
> @@ -8270,7 +8314,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>                 kvm_lapic_sync_from_vapic(vcpu);
>
>         vcpu->arch.gpa_available = false;
> -       r = kvm_x86_ops->handle_exit(vcpu);
> +       r = kvm_x86_ops->handle_exit(vcpu, accel_exit);
>         return r;
>
>  cancel_injection:
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 29391af..f14ec14 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -291,6 +291,7 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
>  bool kvm_vector_hashing_enabled(void);
>  int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
>                             int emulation_type, void *insn, int insn_len);
> +enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu);
>
>  #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
>                                 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
> --
> 2.7.4
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
  2019-11-20  3:42 [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
  2019-11-20  3:42 ` [PATCH v3 2/2] KVM: LAPIC: micro-optimize fixed mode ipi delivery Wanpeng Li
  2019-11-20  5:31 ` [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
@ 2019-11-20 23:36 ` Liran Alon
  2019-11-20 23:55   ` Liran Alon
  2019-11-21  3:16   ` Wanpeng Li
  2 siblings, 2 replies; 7+ messages in thread
From: Liran Alon @ 2019-11-20 23:36 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: linux-kernel, kvm, Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel



> On 20 Nov 2019, at 5:42, Wanpeng Li <kernellwp@gmail.com> wrote:
> 
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in 
> our product observation, multicast IPIs are not as common as unicast 
> IPI like RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.

Have you also had the chance to measure non-Linux workloads. Such as Windows?

> 
> This patch tries to optimize x2apic physical destination mode, fixed 
> delivery mode single target IPI. The fast path is invoked at 
> ->handle_exit_irqoff() to emulate only the effect of the ICR write
> itself, i.e. the sending of IPIs.  Sending IPIs early in the VM-Exit
> flow reduces the latency of virtual IPIs by avoiding the expensive bits
> of transitioning from guest to host, e.g. reacquiring KVM's SRCU lock.
> Especially when running guest w/ KVM_CAP_X86_DISABLE_EXITS capability 
> enabled or guest can keep running, IPI can be injected to target vCPU 
> by posted-interrupt immediately.

May I suggest an alternative phrasing? Something such as:

“””
This patch introduce a mechanism to handle certain performance-critical WRMSRs
in a very early stage of KVM VMExit handler.

This mechanism is specifically used for accelerating writes to x2APIC ICR that
attempt to send a virtual IPI with physical destination-mode, fixed delivery-mode
and single target. Which was found as one of the main causes of VMExits for
Linux workloads.

The reason this mechanism significantly reduce the latency of such virtual IPIs
is by sending the physical IPI to the target vCPU in a very early stage of KVM
VMExit handler, before host interrupts are enabled and before expensive
operations such as reacquiring KVM’s SRCU lock.
Latency is reduced even more when KVM is able to use APICv posted-interrupt
mechanism (which allows to deliver the virtual IPI directly to target vCPU without
the need to kick it to host).
“””

> 
> Testing on Xeon Skylake server:
> 
> The virtual IPI latency from sender send to receiver receive reduces 
> more than 200+ cpu cycles.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> Cc: Liran Alon <liran.alon@oracle.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>

I see you used the code I provided my reply to v2. :)
I had only some very minor comments inline below. Therefore:
Reviewed-by: Liran Alon <liran.alon@oracle.com>

Thanks for doing this optimisation.
-Liran

> ---
> v2 -> v3:
> * for both VMX and SVM
> * vmx_handle_exit() get second parameter by value and not by pointer
> * rename parameter to “accel_exit_completion”
> * preserve tracepoint ordering
> * rename handler to handle_accel_set_msr_irqoff and more generic
> * add comments above handle_accel_set_msr_irqoff
> * msr index APIC_BASE_MSR + (APIC_ICR >> 4)
> v1 -> v2:
> * add tracepoint
> * Instead of a separate vcpu->fast_vmexit, set exit_reason
>  to vmx->exit_reason to -1 if the fast path succeeds.
> * move the "kvm_skip_emulated_instruction(vcpu)" to vmx_handle_exit
> * moving the handling into vmx_handle_exit_irqoff()
> 
> arch/x86/include/asm/kvm_host.h | 11 ++++++++--
> arch/x86/kvm/svm.c              | 14 ++++++++----
> arch/x86/kvm/vmx/vmx.c          | 13 ++++++++---
> arch/x86/kvm/x86.c              | 48 +++++++++++++++++++++++++++++++++++++++--
> arch/x86/kvm/x86.h              |  1 +
> 5 files changed, 76 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 898ab9e..67c7889 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -175,6 +175,11 @@ enum {
> 	VCPU_SREG_LDTR,
> };
> 
> +enum accel_exit_completion {
> +	ACCEL_EXIT_NONE,
> +	ACCEL_EXIT_SKIP_EMUL_INS = -1,

You should remove the “= -1”.

> +};
> +
> #include <asm/kvm_emulate.h>
> 
> #define KVM_NR_MEM_OBJS 40
> @@ -1084,7 +1089,8 @@ struct kvm_x86_ops {
> 	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
> 
> 	void (*run)(struct kvm_vcpu *vcpu);
> -	int (*handle_exit)(struct kvm_vcpu *vcpu);
> +	int (*handle_exit)(struct kvm_vcpu *vcpu,
> +		enum accel_exit_completion accel_exit);
> 	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
> 	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
> 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
> @@ -1134,7 +1140,8 @@ struct kvm_x86_ops {
> 	int (*check_intercept)(struct kvm_vcpu *vcpu,
> 			       struct x86_instruction_info *info,
> 			       enum x86_intercept_stage stage);
> -	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
> +	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
> +		enum accel_exit_completion *accel_exit);
> 	bool (*mpx_supported)(void);
> 	bool (*xsaves_supported)(void);
> 	bool (*umip_emulated)(void);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d02a73a..060b11d 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -4929,7 +4929,8 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
> 	*info2 = control->exit_info_2;
> }
> 
> -static int handle_exit(struct kvm_vcpu *vcpu)
> +static int handle_exit(struct kvm_vcpu *vcpu,
> +	enum accel_exit_completion accel_exit)
> {
> 	struct vcpu_svm *svm = to_svm(vcpu);
> 	struct kvm_run *kvm_run = vcpu->run;
> @@ -4987,7 +4988,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
> 		       __func__, svm->vmcb->control.exit_int_info,
> 		       exit_code);
> 
> -	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
> +	if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
> +		kvm_skip_emulated_instruction(vcpu);
> +		return 1;
> +	} else if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
> 	    || !svm_exit_handlers[exit_code]) {
> 		vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
> 		dump_vmcb(vcpu);
> @@ -6187,9 +6191,11 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
> 	return ret;
> }
> 
> -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
> +	enum accel_exit_completion *accel_exit)
> {
> -
> +	if (to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE)
> +		*accel_exit = handle_accel_set_msr_irqoff(vcpu);
> }
> 
> static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 621142e5..86c0a23 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5792,7 +5792,8 @@ void dump_vmcs(void)
>  * The guest has exited.  See if we can fix it or if we need userspace
>  * assistance.
>  */
> -static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> +static int vmx_handle_exit(struct kvm_vcpu *vcpu,
> +	enum accel_exit_completion accel_exit)
> {
> 	struct vcpu_vmx *vmx = to_vmx(vcpu);
> 	u32 exit_reason = vmx->exit_reason;
> @@ -5878,7 +5879,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> 		}
> 	}
> 
> -	if (exit_reason < kvm_vmx_max_exit_handlers
> +	if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
> +		kvm_skip_emulated_instruction(vcpu);
> +		return 1;
> +	} else if (exit_reason < kvm_vmx_max_exit_handlers
> 	    && kvm_vmx_exit_handlers[exit_reason]) {
> #ifdef CONFIG_RETPOLINE
> 		if (exit_reason == EXIT_REASON_MSR_WRITE)
> @@ -6223,7 +6227,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
> }
> STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
> 
> -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
> +	enum accel_exit_completion *accel_exit)
> {
> 	struct vcpu_vmx *vmx = to_vmx(vcpu);
> 
> @@ -6231,6 +6236,8 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> 		handle_external_interrupt_irqoff(vcpu);
> 	else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
> 		handle_exception_nmi_irqoff(vmx);
> +	else if (vmx->exit_reason == EXIT_REASON_MSR_WRITE)
> +		*accel_exit = handle_accel_set_msr_irqoff(vcpu);
> }
> 
> static bool vmx_has_emulated_msr(int index)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 991dd01..966c659 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1509,6 +1509,49 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
> }
> EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
> 
> +static int handle_accel_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
> +{
> +	if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
> +		((data & KVM_APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
> +		((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
> +
> +		kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
> +		return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
> +	}
> +
> +	return 1;
> +}
> +
> +/*
> + * The fast path for frequent and performance sensitive wrmsr emulation,
> + * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
> + * the latency of virtual IPI by avoiding the expensive bits of transitioning
> + * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the
> + * other cases which must be called after interrupts are enabled on the host.
> + */

This comment belongs better on top of handle_accel_set_x2apic_icr_irqoff().
As handle_accel_set_msr_irqoff() is in theory written to maybe use it for other MSRs as-well.

> +enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu)
> +{
> +	u32 msr = kvm_rcx_read(vcpu);
> +	u64 data = kvm_read_edx_eax(vcpu);
> +	int ret = 0;
> +
> +	switch (msr) {
> +	case APIC_BASE_MSR + (APIC_ICR >> 4):
> +		ret = handle_accel_set_x2apic_icr_irqoff(vcpu, data);
> +		break;
> +	default:
> +		return ACCEL_EXIT_NONE;
> +	}
> +
> +	if (!ret) {
> +		trace_kvm_msr_write(msr, data);
> +		return ACCEL_EXIT_SKIP_EMUL_INS;
> +	}
> +
> +	return ACCEL_EXIT_NONE;
> +}
> +EXPORT_SYMBOL_GPL(handle_accel_set_msr_irqoff);
> +
> /*
>  * Adapt set_msr() to msr_io()'s calling convention
>  */
> @@ -7984,6 +8027,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> 	bool req_int_win =
> 		dm_request_for_irq_injection(vcpu) &&
> 		kvm_cpu_accept_dm_intr(vcpu);
> +	enum accel_exit_completion accel_exit = ACCEL_EXIT_NONE;
> 
> 	bool req_immediate_exit = false;
> 
> @@ -8226,7 +8270,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> 	vcpu->mode = OUTSIDE_GUEST_MODE;
> 	smp_wmb();
> 
> -	kvm_x86_ops->handle_exit_irqoff(vcpu);
> +	kvm_x86_ops->handle_exit_irqoff(vcpu, &accel_exit);
> 
> 	/*
> 	 * Consume any pending interrupts, including the possible source of
> @@ -8270,7 +8314,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> 		kvm_lapic_sync_from_vapic(vcpu);
> 
> 	vcpu->arch.gpa_available = false;
> -	r = kvm_x86_ops->handle_exit(vcpu);
> +	r = kvm_x86_ops->handle_exit(vcpu, accel_exit);
> 	return r;
> 
> cancel_injection:
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 29391af..f14ec14 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -291,6 +291,7 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
> bool kvm_vector_hashing_enabled(void);
> int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
> 			    int emulation_type, void *insn, int insn_len);
> +enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu);
> 
> #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
> 				| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
> -- 
> 2.7.4
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
  2019-11-20 23:36 ` Liran Alon
@ 2019-11-20 23:55   ` Liran Alon
  2019-11-21  0:43     ` Wanpeng Li
  2019-11-21  3:16   ` Wanpeng Li
  1 sibling, 1 reply; 7+ messages in thread
From: Liran Alon @ 2019-11-20 23:55 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: linux-kernel, kvm, Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel



> On 21 Nov 2019, at 1:36, Liran Alon <liran.alon@oracle.com> wrote:
> 
> 
> 
>> On 20 Nov 2019, at 5:42, Wanpeng Li <kernellwp@gmail.com> wrote:
>> 
>> From: Wanpeng Li <wanpengli@tencent.com>
>> 
>> ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in 
>> our product observation, multicast IPIs are not as common as unicast 
>> IPI like RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.
> 
> Have you also had the chance to measure non-Linux workloads. Such as Windows?
> 
>> 
>> This patch tries to optimize x2apic physical destination mode, fixed 
>> delivery mode single target IPI. The fast path is invoked at 
>> ->handle_exit_irqoff() to emulate only the effect of the ICR write
>> itself, i.e. the sending of IPIs.  Sending IPIs early in the VM-Exit
>> flow reduces the latency of virtual IPIs by avoiding the expensive bits
>> of transitioning from guest to host, e.g. reacquiring KVM's SRCU lock.
>> Especially when running guest w/ KVM_CAP_X86_DISABLE_EXITS capability 
>> enabled or guest can keep running, IPI can be injected to target vCPU 
>> by posted-interrupt immediately.
> 
> May I suggest an alternative phrasing? Something such as:
> 
> “””
> This patch introduce a mechanism to handle certain performance-critical WRMSRs
> in a very early stage of KVM VMExit handler.
> 
> This mechanism is specifically used for accelerating writes to x2APIC ICR that
> attempt to send a virtual IPI with physical destination-mode, fixed delivery-mode
> and single target. Which was found as one of the main causes of VMExits for
> Linux workloads.
> 
> The reason this mechanism significantly reduce the latency of such virtual IPIs
> is by sending the physical IPI to the target vCPU in a very early stage of KVM
> VMExit handler, before host interrupts are enabled and before expensive
> operations such as reacquiring KVM’s SRCU lock.
> Latency is reduced even more when KVM is able to use APICv posted-interrupt
> mechanism (which allows to deliver the virtual IPI directly to target vCPU without
> the need to kick it to host).
> “””
> 
>> 
>> Testing on Xeon Skylake server:
>> 
>> The virtual IPI latency from sender send to receiver receive reduces 
>> more than 200+ cpu cycles.
>> 
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
>> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
>> Cc: Liran Alon <liran.alon@oracle.com>
>> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> 
> I see you used the code I provided my reply to v2. :)
> I had only some very minor comments inline below. Therefore:
> Reviewed-by: Liran Alon <liran.alon@oracle.com>

Oh there is a small thing pointed by Sean I agree with (and missed in my review) before this Reviewed-by can be given.
You should avoid performing accelerated WRMSR handling in case vCPU is in guest-mode.
(From obvious reasons as L1 for example could intercept this WRMSR…)

-Liran

> 
> Thanks for doing this optimisation.
> -Liran
> 
>> ---
>> v2 -> v3:
>> * for both VMX and SVM
>> * vmx_handle_exit() get second parameter by value and not by pointer
>> * rename parameter to “accel_exit_completion”
>> * preserve tracepoint ordering
>> * rename handler to handle_accel_set_msr_irqoff and more generic
>> * add comments above handle_accel_set_msr_irqoff
>> * msr index APIC_BASE_MSR + (APIC_ICR >> 4)
>> v1 -> v2:
>> * add tracepoint
>> * Instead of a separate vcpu->fast_vmexit, set exit_reason
>> to vmx->exit_reason to -1 if the fast path succeeds.
>> * move the "kvm_skip_emulated_instruction(vcpu)" to vmx_handle_exit
>> * moving the handling into vmx_handle_exit_irqoff()
>> 
>> arch/x86/include/asm/kvm_host.h | 11 ++++++++--
>> arch/x86/kvm/svm.c              | 14 ++++++++----
>> arch/x86/kvm/vmx/vmx.c          | 13 ++++++++---
>> arch/x86/kvm/x86.c              | 48 +++++++++++++++++++++++++++++++++++++++--
>> arch/x86/kvm/x86.h              |  1 +
>> 5 files changed, 76 insertions(+), 11 deletions(-)
>> 
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 898ab9e..67c7889 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -175,6 +175,11 @@ enum {
>> 	VCPU_SREG_LDTR,
>> };
>> 
>> +enum accel_exit_completion {
>> +	ACCEL_EXIT_NONE,
>> +	ACCEL_EXIT_SKIP_EMUL_INS = -1,
> 
> You should remove the “= -1”.
> 
>> +};
>> +
>> #include <asm/kvm_emulate.h>
>> 
>> #define KVM_NR_MEM_OBJS 40
>> @@ -1084,7 +1089,8 @@ struct kvm_x86_ops {
>> 	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
>> 
>> 	void (*run)(struct kvm_vcpu *vcpu);
>> -	int (*handle_exit)(struct kvm_vcpu *vcpu);
>> +	int (*handle_exit)(struct kvm_vcpu *vcpu,
>> +		enum accel_exit_completion accel_exit);
>> 	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
>> 	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
>> 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
>> @@ -1134,7 +1140,8 @@ struct kvm_x86_ops {
>> 	int (*check_intercept)(struct kvm_vcpu *vcpu,
>> 			       struct x86_instruction_info *info,
>> 			       enum x86_intercept_stage stage);
>> -	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
>> +	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
>> +		enum accel_exit_completion *accel_exit);
>> 	bool (*mpx_supported)(void);
>> 	bool (*xsaves_supported)(void);
>> 	bool (*umip_emulated)(void);
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index d02a73a..060b11d 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -4929,7 +4929,8 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
>> 	*info2 = control->exit_info_2;
>> }
>> 
>> -static int handle_exit(struct kvm_vcpu *vcpu)
>> +static int handle_exit(struct kvm_vcpu *vcpu,
>> +	enum accel_exit_completion accel_exit)
>> {
>> 	struct vcpu_svm *svm = to_svm(vcpu);
>> 	struct kvm_run *kvm_run = vcpu->run;
>> @@ -4987,7 +4988,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
>> 		       __func__, svm->vmcb->control.exit_int_info,
>> 		       exit_code);
>> 
>> -	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
>> +	if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
>> +		kvm_skip_emulated_instruction(vcpu);
>> +		return 1;
>> +	} else if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
>> 	    || !svm_exit_handlers[exit_code]) {
>> 		vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
>> 		dump_vmcb(vcpu);
>> @@ -6187,9 +6191,11 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
>> 	return ret;
>> }
>> 
>> -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
>> +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
>> +	enum accel_exit_completion *accel_exit)
>> {
>> -
>> +	if (to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE)
>> +		*accel_exit = handle_accel_set_msr_irqoff(vcpu);
>> }
>> 
>> static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index 621142e5..86c0a23 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -5792,7 +5792,8 @@ void dump_vmcs(void)
>> * The guest has exited.  See if we can fix it or if we need userspace
>> * assistance.
>> */
>> -static int vmx_handle_exit(struct kvm_vcpu *vcpu)
>> +static int vmx_handle_exit(struct kvm_vcpu *vcpu,
>> +	enum accel_exit_completion accel_exit)
>> {
>> 	struct vcpu_vmx *vmx = to_vmx(vcpu);
>> 	u32 exit_reason = vmx->exit_reason;
>> @@ -5878,7 +5879,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
>> 		}
>> 	}
>> 
>> -	if (exit_reason < kvm_vmx_max_exit_handlers
>> +	if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
>> +		kvm_skip_emulated_instruction(vcpu);
>> +		return 1;
>> +	} else if (exit_reason < kvm_vmx_max_exit_handlers
>> 	    && kvm_vmx_exit_handlers[exit_reason]) {
>> #ifdef CONFIG_RETPOLINE
>> 		if (exit_reason == EXIT_REASON_MSR_WRITE)
>> @@ -6223,7 +6227,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
>> }
>> STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
>> 
>> -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
>> +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
>> +	enum accel_exit_completion *accel_exit)
>> {
>> 	struct vcpu_vmx *vmx = to_vmx(vcpu);
>> 
>> @@ -6231,6 +6236,8 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
>> 		handle_external_interrupt_irqoff(vcpu);
>> 	else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
>> 		handle_exception_nmi_irqoff(vmx);
>> +	else if (vmx->exit_reason == EXIT_REASON_MSR_WRITE)
>> +		*accel_exit = handle_accel_set_msr_irqoff(vcpu);
>> }
>> 
>> static bool vmx_has_emulated_msr(int index)
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 991dd01..966c659 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -1509,6 +1509,49 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
>> }
>> EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
>> 
>> +static int handle_accel_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
>> +{
>> +	if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
>> +		((data & KVM_APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
>> +		((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
>> +
>> +		kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
>> +		return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
>> +	}
>> +
>> +	return 1;
>> +}
>> +
>> +/*
>> + * The fast path for frequent and performance sensitive wrmsr emulation,
>> + * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
>> + * the latency of virtual IPI by avoiding the expensive bits of transitioning
>> + * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the
>> + * other cases which must be called after interrupts are enabled on the host.
>> + */
> 
> This comment belongs better on top of handle_accel_set_x2apic_icr_irqoff().
> As handle_accel_set_msr_irqoff() is in theory written to maybe use it for other MSRs as-well.
> 
>> +enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu)
>> +{
>> +	u32 msr = kvm_rcx_read(vcpu);
>> +	u64 data = kvm_read_edx_eax(vcpu);
>> +	int ret = 0;
>> +
>> +	switch (msr) {
>> +	case APIC_BASE_MSR + (APIC_ICR >> 4):
>> +		ret = handle_accel_set_x2apic_icr_irqoff(vcpu, data);
>> +		break;
>> +	default:
>> +		return ACCEL_EXIT_NONE;
>> +	}
>> +
>> +	if (!ret) {
>> +		trace_kvm_msr_write(msr, data);
>> +		return ACCEL_EXIT_SKIP_EMUL_INS;
>> +	}
>> +
>> +	return ACCEL_EXIT_NONE;
>> +}
>> +EXPORT_SYMBOL_GPL(handle_accel_set_msr_irqoff);
>> +
>> /*
>> * Adapt set_msr() to msr_io()'s calling convention
>> */
>> @@ -7984,6 +8027,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>> 	bool req_int_win =
>> 		dm_request_for_irq_injection(vcpu) &&
>> 		kvm_cpu_accept_dm_intr(vcpu);
>> +	enum accel_exit_completion accel_exit = ACCEL_EXIT_NONE;
>> 
>> 	bool req_immediate_exit = false;
>> 
>> @@ -8226,7 +8270,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>> 	vcpu->mode = OUTSIDE_GUEST_MODE;
>> 	smp_wmb();
>> 
>> -	kvm_x86_ops->handle_exit_irqoff(vcpu);
>> +	kvm_x86_ops->handle_exit_irqoff(vcpu, &accel_exit);
>> 
>> 	/*
>> 	 * Consume any pending interrupts, including the possible source of
>> @@ -8270,7 +8314,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>> 		kvm_lapic_sync_from_vapic(vcpu);
>> 
>> 	vcpu->arch.gpa_available = false;
>> -	r = kvm_x86_ops->handle_exit(vcpu);
>> +	r = kvm_x86_ops->handle_exit(vcpu, accel_exit);
>> 	return r;
>> 
>> cancel_injection:
>> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
>> index 29391af..f14ec14 100644
>> --- a/arch/x86/kvm/x86.h
>> +++ b/arch/x86/kvm/x86.h
>> @@ -291,6 +291,7 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
>> bool kvm_vector_hashing_enabled(void);
>> int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
>> 			    int emulation_type, void *insn, int insn_len);
>> +enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu);
>> 
>> #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
>> 				| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
>> -- 
>> 2.7.4


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
  2019-11-20 23:55   ` Liran Alon
@ 2019-11-21  0:43     ` Wanpeng Li
  0 siblings, 0 replies; 7+ messages in thread
From: Wanpeng Li @ 2019-11-21  0:43 UTC (permalink / raw)
  To: Liran Alon
  Cc: LKML, kvm, Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel

On Thu, 21 Nov 2019 at 07:55, Liran Alon <liran.alon@oracle.com> wrote:
>
>
>
> > On 21 Nov 2019, at 1:36, Liran Alon <liran.alon@oracle.com> wrote:
> >
> >
> >
> >> On 20 Nov 2019, at 5:42, Wanpeng Li <kernellwp@gmail.com> wrote:
> >>
> >> From: Wanpeng Li <wanpengli@tencent.com>
> >>
> >> ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in
> >> our product observation, multicast IPIs are not as common as unicast
> >> IPI like RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.
> >
> > Have you also had the chance to measure non-Linux workloads. Such as Windows?
> >
> >>
> >> This patch tries to optimize x2apic physical destination mode, fixed
> >> delivery mode single target IPI. The fast path is invoked at
> >> ->handle_exit_irqoff() to emulate only the effect of the ICR write
> >> itself, i.e. the sending of IPIs.  Sending IPIs early in the VM-Exit
> >> flow reduces the latency of virtual IPIs by avoiding the expensive bits
> >> of transitioning from guest to host, e.g. reacquiring KVM's SRCU lock.
> >> Especially when running guest w/ KVM_CAP_X86_DISABLE_EXITS capability
> >> enabled or guest can keep running, IPI can be injected to target vCPU
> >> by posted-interrupt immediately.
> >
> > May I suggest an alternative phrasing? Something such as:
> >
> > “””
> > This patch introduce a mechanism to handle certain performance-critical WRMSRs
> > in a very early stage of KVM VMExit handler.
> >
> > This mechanism is specifically used for accelerating writes to x2APIC ICR that
> > attempt to send a virtual IPI with physical destination-mode, fixed delivery-mode
> > and single target. Which was found as one of the main causes of VMExits for
> > Linux workloads.
> >
> > The reason this mechanism significantly reduce the latency of such virtual IPIs
> > is by sending the physical IPI to the target vCPU in a very early stage of KVM
> > VMExit handler, before host interrupts are enabled and before expensive
> > operations such as reacquiring KVM’s SRCU lock.
> > Latency is reduced even more when KVM is able to use APICv posted-interrupt
> > mechanism (which allows to deliver the virtual IPI directly to target vCPU without
> > the need to kick it to host).
> > “””
> >
> >>
> >> Testing on Xeon Skylake server:
> >>
> >> The virtual IPI latency from sender send to receiver receive reduces
> >> more than 200+ cpu cycles.
> >>
> >> Cc: Paolo Bonzini <pbonzini@redhat.com>
> >> Cc: Radim Krčmář <rkrcmar@redhat.com>
> >> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> >> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> >> Cc: Liran Alon <liran.alon@oracle.com>
> >> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> >
> > I see you used the code I provided my reply to v2. :)
> > I had only some very minor comments inline below. Therefore:
> > Reviewed-by: Liran Alon <liran.alon@oracle.com>
>
> Oh there is a small thing pointed by Sean I agree with (and missed in my review) before this Reviewed-by can be given.
> You should avoid performing accelerated WRMSR handling in case vCPU is in guest-mode.
> (From obvious reasons as L1 for example could intercept this WRMSR…)

Yes, I add !is_guest_mode(vcpu) checking in v1, but careless lose it
in further versions.

    Wanpeng

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
  2019-11-20 23:36 ` Liran Alon
  2019-11-20 23:55   ` Liran Alon
@ 2019-11-21  3:16   ` Wanpeng Li
  1 sibling, 0 replies; 7+ messages in thread
From: Wanpeng Li @ 2019-11-21  3:16 UTC (permalink / raw)
  To: Liran Alon
  Cc: LKML, kvm, Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel

On Thu, 21 Nov 2019 at 07:37, Liran Alon <liran.alon@oracle.com> wrote:
>
>
>
> > On 20 Nov 2019, at 5:42, Wanpeng Li <kernellwp@gmail.com> wrote:
> >
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in
> > our product observation, multicast IPIs are not as common as unicast
> > IPI like RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.
>
> Have you also had the chance to measure non-Linux workloads. Such as Windows?

I ask around, guys not pay attention to IPIs under windows guests before.

>
> >
> > This patch tries to optimize x2apic physical destination mode, fixed
> > delivery mode single target IPI. The fast path is invoked at
> > ->handle_exit_irqoff() to emulate only the effect of the ICR write
> > itself, i.e. the sending of IPIs.  Sending IPIs early in the VM-Exit
> > flow reduces the latency of virtual IPIs by avoiding the expensive bits
> > of transitioning from guest to host, e.g. reacquiring KVM's SRCU lock.
> > Especially when running guest w/ KVM_CAP_X86_DISABLE_EXITS capability
> > enabled or guest can keep running, IPI can be injected to target vCPU
> > by posted-interrupt immediately.
>
> May I suggest an alternative phrasing? Something such as:

Great, thanks for better English.

>
> “””
> This patch introduce a mechanism to handle certain performance-critical WRMSRs
> in a very early stage of KVM VMExit handler.
>
> This mechanism is specifically used for accelerating writes to x2APIC ICR that
> attempt to send a virtual IPI with physical destination-mode, fixed delivery-mode
> and single target. Which was found as one of the main causes of VMExits for
> Linux workloads.
>
> The reason this mechanism significantly reduce the latency of such virtual IPIs
> is by sending the physical IPI to the target vCPU in a very early stage of KVM
> VMExit handler, before host interrupts are enabled and before expensive
> operations such as reacquiring KVM’s SRCU lock.
> Latency is reduced even more when KVM is able to use APICv posted-interrupt
> mechanism (which allows to deliver the virtual IPI directly to target vCPU without
> the need to kick it to host).
> “””
>
> >
> > Testing on Xeon Skylake server:
> >
> > The virtual IPI latency from sender send to receiver receive reduces
> > more than 200+ cpu cycles.
> >
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Radim Krčmář <rkrcmar@redhat.com>
> > Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> > Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> > Cc: Liran Alon <liran.alon@oracle.com>
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
>
> I see you used the code I provided my reply to v2. :)
> I had only some very minor comments inline below. Therefore:
> Reviewed-by: Liran Alon <liran.alon@oracle.com>

Thanks, handle them in v4.

>
> Thanks for doing this optimisation.

Thanks everybody who help make this work nice. :)

    Wanpeng

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-11-21  3:16 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-20  3:42 [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
2019-11-20  3:42 ` [PATCH v3 2/2] KVM: LAPIC: micro-optimize fixed mode ipi delivery Wanpeng Li
2019-11-20  5:31 ` [PATCH v3 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
2019-11-20 23:36 ` Liran Alon
2019-11-20 23:55   ` Liran Alon
2019-11-21  0:43     ` Wanpeng Li
2019-11-21  3:16   ` Wanpeng Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).