linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
@ 2019-11-21  3:17 Wanpeng Li
  2019-11-21  3:17 ` [PATCH v4 2/2] KVM: LAPIC: micro-optimize fixed mode ipi delivery Wanpeng Li
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Wanpeng Li @ 2019-11-21  3:17 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Liran Alon

From: Wanpeng Li <wanpengli@tencent.com>

ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in our 
product observation, multicast IPIs are not as common as unicast IPI like 
RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.

This patch introduce a mechanism to handle certain performance-critical 
WRMSRs in a very early stage of KVM VMExit handler.

This mechanism is specifically used for accelerating writes to x2APIC ICR 
that attempt to send a virtual IPI with physical destination-mode, fixed 
delivery-mode and single target. Which was found as one of the main causes 
of VMExits for Linux workloads.

The reason this mechanism significantly reduce the latency of such virtual 
IPIs is by sending the physical IPI to the target vCPU in a very early stage 
of KVM VMExit handler, before host interrupts are enabled and before expensive
operations such as reacquiring KVM’s SRCU lock.
Latency is reduced even more when KVM is able to use APICv posted-interrupt
mechanism (which allows to deliver the virtual IPI directly to target vCPU 
without the need to kick it to host).

Testing on Xeon Skylake server:

The virtual IPI latency from sender send to receiver receive reduces 
more than 200+ cpu cycles.

Reviewed-by: Liran Alon <liran.alon@oracle.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
v3 -> v4:
 * check !is_guest_mode(vcpu)
 * ACCEL_EXIT_SKIP_EMUL_INS don't need be -1
 * move comments on top of handle_accel_set_x2apic_icr_irqoff
 * update patch description
v2 -> v3:
 * for both VMX and SVM
 * vmx_handle_exit() get second parameter by value and not by pointer
 * rename parameter to “accel_exit_completion”
 * preserve tracepoint ordering
 * rename handler to handle_accel_set_msr_irqoff and more generic
 * add comments above handle_accel_set_msr_irqoff
 * msr index APIC_BASE_MSR + (APIC_ICR >> 4)
v1 -> v2:
 * add tracepoint
 * Instead of a separate vcpu->fast_vmexit, set exit_reason
  to vmx->exit_reason to -1 if the fast path succeeds.
 * move the "kvm_skip_emulated_instruction(vcpu)" to vmx_handle_exit
 * moving the handling into vmx_handle_exit_irqoff()

 arch/x86/include/asm/kvm_host.h | 11 ++++++++--
 arch/x86/kvm/svm.c              | 15 +++++++++----
 arch/x86/kvm/vmx/vmx.c          | 14 +++++++++---
 arch/x86/kvm/x86.c              | 48 +++++++++++++++++++++++++++++++++++++++--
 arch/x86/kvm/x86.h              |  1 +
 5 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 898ab9e..62af1c5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -175,6 +175,11 @@ enum {
 	VCPU_SREG_LDTR,
 };
 
+enum accel_exit_completion {
+	ACCEL_EXIT_NONE,
+	ACCEL_EXIT_SKIP_EMUL_INS,
+};
+
 #include <asm/kvm_emulate.h>
 
 #define KVM_NR_MEM_OBJS 40
@@ -1084,7 +1089,8 @@ struct kvm_x86_ops {
 	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
 
 	void (*run)(struct kvm_vcpu *vcpu);
-	int (*handle_exit)(struct kvm_vcpu *vcpu);
+	int (*handle_exit)(struct kvm_vcpu *vcpu,
+		enum accel_exit_completion accel_exit);
 	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
@@ -1134,7 +1140,8 @@ struct kvm_x86_ops {
 	int (*check_intercept)(struct kvm_vcpu *vcpu,
 			       struct x86_instruction_info *info,
 			       enum x86_intercept_stage stage);
-	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
+	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
+		enum accel_exit_completion *accel_exit);
 	bool (*mpx_supported)(void);
 	bool (*xsaves_supported)(void);
 	bool (*umip_emulated)(void);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d02a73a..d0367c4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4929,7 +4929,8 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
 	*info2 = control->exit_info_2;
 }
 
-static int handle_exit(struct kvm_vcpu *vcpu)
+static int handle_exit(struct kvm_vcpu *vcpu,
+	enum accel_exit_completion accel_exit)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct kvm_run *kvm_run = vcpu->run;
@@ -4987,7 +4988,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 		       __func__, svm->vmcb->control.exit_int_info,
 		       exit_code);
 
-	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
+	if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
+		kvm_skip_emulated_instruction(vcpu);
+		return 1;
+	} else if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
 	    || !svm_exit_handlers[exit_code]) {
 		vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
 		dump_vmcb(vcpu);
@@ -6187,9 +6191,12 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
 	return ret;
 }
 
-static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
+	enum accel_exit_completion *accel_exit)
 {
-
+	if (!is_guest_mode(vcpu) &&
+		to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE)
+		*accel_exit = handle_accel_set_msr_irqoff(vcpu);
 }
 
 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 621142e5..5d77188 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5792,7 +5792,8 @@ void dump_vmcs(void)
  * The guest has exited.  See if we can fix it or if we need userspace
  * assistance.
  */
-static int vmx_handle_exit(struct kvm_vcpu *vcpu)
+static int vmx_handle_exit(struct kvm_vcpu *vcpu,
+	enum accel_exit_completion accel_exit)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 exit_reason = vmx->exit_reason;
@@ -5878,7 +5879,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	if (exit_reason < kvm_vmx_max_exit_handlers
+	if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
+		kvm_skip_emulated_instruction(vcpu);
+		return 1;
+	} else if (exit_reason < kvm_vmx_max_exit_handlers
 	    && kvm_vmx_exit_handlers[exit_reason]) {
 #ifdef CONFIG_RETPOLINE
 		if (exit_reason == EXIT_REASON_MSR_WRITE)
@@ -6223,7 +6227,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 }
 STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
 
-static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
+	enum accel_exit_completion *accel_exit)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
@@ -6231,6 +6236,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 		handle_external_interrupt_irqoff(vcpu);
 	else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
 		handle_exception_nmi_irqoff(vmx);
+	else if (!is_guest_mode(vcpu) &&
+		vmx->exit_reason == EXIT_REASON_MSR_WRITE)
+		*accel_exit = handle_accel_set_msr_irqoff(vcpu);
 }
 
 static bool vmx_has_emulated_msr(int index)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 991dd01..c55348c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1510,6 +1510,49 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
 
 /*
+ * The fast path for frequent and performance sensitive wrmsr emulation,
+ * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
+ * the latency of virtual IPI by avoiding the expensive bits of transitioning
+ * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the
+ * other cases which must be called after interrupts are enabled on the host.
+ */
+static int handle_accel_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
+{
+	if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
+		((data & KVM_APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
+		((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
+
+		kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
+		return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
+	}
+
+	return 1;
+}
+
+enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu)
+{
+	u32 msr = kvm_rcx_read(vcpu);
+	u64 data = kvm_read_edx_eax(vcpu);
+	int ret = 0;
+
+	switch (msr) {
+	case APIC_BASE_MSR + (APIC_ICR >> 4):
+		ret = handle_accel_set_x2apic_icr_irqoff(vcpu, data);
+		break;
+	default:
+		return ACCEL_EXIT_NONE;
+	}
+
+	if (!ret) {
+		trace_kvm_msr_write(msr, data);
+		return ACCEL_EXIT_SKIP_EMUL_INS;
+	}
+
+	return ACCEL_EXIT_NONE;
+}
+EXPORT_SYMBOL_GPL(handle_accel_set_msr_irqoff);
+
+/*
  * Adapt set_msr() to msr_io()'s calling convention
  */
 static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
@@ -7984,6 +8027,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	bool req_int_win =
 		dm_request_for_irq_injection(vcpu) &&
 		kvm_cpu_accept_dm_intr(vcpu);
+	enum accel_exit_completion accel_exit = ACCEL_EXIT_NONE;
 
 	bool req_immediate_exit = false;
 
@@ -8226,7 +8270,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
 
-	kvm_x86_ops->handle_exit_irqoff(vcpu);
+	kvm_x86_ops->handle_exit_irqoff(vcpu, &accel_exit);
 
 	/*
 	 * Consume any pending interrupts, including the possible source of
@@ -8270,7 +8314,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		kvm_lapic_sync_from_vapic(vcpu);
 
 	vcpu->arch.gpa_available = false;
-	r = kvm_x86_ops->handle_exit(vcpu);
+	r = kvm_x86_ops->handle_exit(vcpu, accel_exit);
 	return r;
 
 cancel_injection:
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 29391af..f14ec14 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -291,6 +291,7 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
 bool kvm_vector_hashing_enabled(void);
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
 			    int emulation_type, void *insn, int insn_len);
+enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu);
 
 #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
 				| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v4 2/2] KVM: LAPIC: micro-optimize fixed mode ipi delivery
  2019-11-21  3:17 [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
@ 2019-11-21  3:17 ` Wanpeng Li
  2020-01-15 17:51   ` Paolo Bonzini
  2019-11-28  0:27 ` [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Wanpeng Li @ 2019-11-21  3:17 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel

From: Wanpeng Li <wanpengli@tencent.com>

This patch optimizes redundancy logic before fixed mode ipi is delivered
in the fast path, broadcast handling needs to go slow path, so the delivery
mode repair can be delayed to before slow path.

Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kvm/irq_comm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8ecd48d..aa88156 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -52,15 +52,15 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
 	unsigned int dest_vcpus = 0;
 
+	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
+		return r;
+
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
 			kvm_lowest_prio_delivery(irq)) {
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
 		irq->delivery_mode = APIC_DM_FIXED;
 	}
 
-	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
-		return r;
-
 	memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
  2019-11-21  3:17 [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
  2019-11-21  3:17 ` [PATCH v4 2/2] KVM: LAPIC: micro-optimize fixed mode ipi delivery Wanpeng Li
@ 2019-11-28  0:27 ` Wanpeng Li
  2019-12-09  8:15   ` Wanpeng Li
  2020-01-15 17:48 ` Paolo Bonzini
  2020-02-26  3:32 ` Wanpeng Li
  3 siblings, 1 reply; 9+ messages in thread
From: Wanpeng Li @ 2019-11-28  0:27 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: LKML, kvm, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Liran Alon

ping to catch the second week of the merge window. :)
On Thu, 21 Nov 2019 at 11:17, Wanpeng Li <kernellwp@gmail.com> wrote:
>
> From: Wanpeng Li <wanpengli@tencent.com>
>
> ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in our
> product observation, multicast IPIs are not as common as unicast IPI like
> RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.
>
> This patch introduce a mechanism to handle certain performance-critical
> WRMSRs in a very early stage of KVM VMExit handler.
>
> This mechanism is specifically used for accelerating writes to x2APIC ICR
> that attempt to send a virtual IPI with physical destination-mode, fixed
> delivery-mode and single target. Which was found as one of the main causes
> of VMExits for Linux workloads.
>
> The reason this mechanism significantly reduce the latency of such virtual
> IPIs is by sending the physical IPI to the target vCPU in a very early stage
> of KVM VMExit handler, before host interrupts are enabled and before expensive
> operations such as reacquiring KVM’s SRCU lock.
> Latency is reduced even more when KVM is able to use APICv posted-interrupt
> mechanism (which allows to deliver the virtual IPI directly to target vCPU
> without the need to kick it to host).
>
> Testing on Xeon Skylake server:
>
> The virtual IPI latency from sender send to receiver receive reduces
> more than 200+ cpu cycles.
>
> Reviewed-by: Liran Alon <liran.alon@oracle.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> Cc: Liran Alon <liran.alon@oracle.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
> v3 -> v4:
>  * check !is_guest_mode(vcpu)
>  * ACCEL_EXIT_SKIP_EMUL_INS don't need be -1
>  * move comments on top of handle_accel_set_x2apic_icr_irqoff
>  * update patch description
> v2 -> v3:
>  * for both VMX and SVM
>  * vmx_handle_exit() get second parameter by value and not by pointer
>  * rename parameter to “accel_exit_completion”
>  * preserve tracepoint ordering
>  * rename handler to handle_accel_set_msr_irqoff and more generic
>  * add comments above handle_accel_set_msr_irqoff
>  * msr index APIC_BASE_MSR + (APIC_ICR >> 4)
> v1 -> v2:
>  * add tracepoint
>  * Instead of a separate vcpu->fast_vmexit, set exit_reason
>   to vmx->exit_reason to -1 if the fast path succeeds.
>  * move the "kvm_skip_emulated_instruction(vcpu)" to vmx_handle_exit
>  * moving the handling into vmx_handle_exit_irqoff()
>
>  arch/x86/include/asm/kvm_host.h | 11 ++++++++--
>  arch/x86/kvm/svm.c              | 15 +++++++++----
>  arch/x86/kvm/vmx/vmx.c          | 14 +++++++++---
>  arch/x86/kvm/x86.c              | 48 +++++++++++++++++++++++++++++++++++++++--
>  arch/x86/kvm/x86.h              |  1 +
>  5 files changed, 78 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 898ab9e..62af1c5 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -175,6 +175,11 @@ enum {
>         VCPU_SREG_LDTR,
>  };
>
> +enum accel_exit_completion {
> +       ACCEL_EXIT_NONE,
> +       ACCEL_EXIT_SKIP_EMUL_INS,
> +};
> +
>  #include <asm/kvm_emulate.h>
>
>  #define KVM_NR_MEM_OBJS 40
> @@ -1084,7 +1089,8 @@ struct kvm_x86_ops {
>         void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
>
>         void (*run)(struct kvm_vcpu *vcpu);
> -       int (*handle_exit)(struct kvm_vcpu *vcpu);
> +       int (*handle_exit)(struct kvm_vcpu *vcpu,
> +               enum accel_exit_completion accel_exit);
>         int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
>         void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
>         u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
> @@ -1134,7 +1140,8 @@ struct kvm_x86_ops {
>         int (*check_intercept)(struct kvm_vcpu *vcpu,
>                                struct x86_instruction_info *info,
>                                enum x86_intercept_stage stage);
> -       void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
> +       void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
> +               enum accel_exit_completion *accel_exit);
>         bool (*mpx_supported)(void);
>         bool (*xsaves_supported)(void);
>         bool (*umip_emulated)(void);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d02a73a..d0367c4 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -4929,7 +4929,8 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
>         *info2 = control->exit_info_2;
>  }
>
> -static int handle_exit(struct kvm_vcpu *vcpu)
> +static int handle_exit(struct kvm_vcpu *vcpu,
> +       enum accel_exit_completion accel_exit)
>  {
>         struct vcpu_svm *svm = to_svm(vcpu);
>         struct kvm_run *kvm_run = vcpu->run;
> @@ -4987,7 +4988,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
>                        __func__, svm->vmcb->control.exit_int_info,
>                        exit_code);
>
> -       if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
> +       if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
> +               kvm_skip_emulated_instruction(vcpu);
> +               return 1;
> +       } else if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
>             || !svm_exit_handlers[exit_code]) {
>                 vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
>                 dump_vmcb(vcpu);
> @@ -6187,9 +6191,12 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
>         return ret;
>  }
>
> -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
> +       enum accel_exit_completion *accel_exit)
>  {
> -
> +       if (!is_guest_mode(vcpu) &&
> +               to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE)
> +               *accel_exit = handle_accel_set_msr_irqoff(vcpu);
>  }
>
>  static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 621142e5..5d77188 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5792,7 +5792,8 @@ void dump_vmcs(void)
>   * The guest has exited.  See if we can fix it or if we need userspace
>   * assistance.
>   */
> -static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> +static int vmx_handle_exit(struct kvm_vcpu *vcpu,
> +       enum accel_exit_completion accel_exit)
>  {
>         struct vcpu_vmx *vmx = to_vmx(vcpu);
>         u32 exit_reason = vmx->exit_reason;
> @@ -5878,7 +5879,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
>                 }
>         }
>
> -       if (exit_reason < kvm_vmx_max_exit_handlers
> +       if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
> +               kvm_skip_emulated_instruction(vcpu);
> +               return 1;
> +       } else if (exit_reason < kvm_vmx_max_exit_handlers
>             && kvm_vmx_exit_handlers[exit_reason]) {
>  #ifdef CONFIG_RETPOLINE
>                 if (exit_reason == EXIT_REASON_MSR_WRITE)
> @@ -6223,7 +6227,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
>  }
>  STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
>
> -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
> +       enum accel_exit_completion *accel_exit)
>  {
>         struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> @@ -6231,6 +6236,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
>                 handle_external_interrupt_irqoff(vcpu);
>         else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
>                 handle_exception_nmi_irqoff(vmx);
> +       else if (!is_guest_mode(vcpu) &&
> +               vmx->exit_reason == EXIT_REASON_MSR_WRITE)
> +               *accel_exit = handle_accel_set_msr_irqoff(vcpu);
>  }
>
>  static bool vmx_has_emulated_msr(int index)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 991dd01..c55348c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1510,6 +1510,49 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
>  EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
>
>  /*
> + * The fast path for frequent and performance sensitive wrmsr emulation,
> + * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
> + * the latency of virtual IPI by avoiding the expensive bits of transitioning
> + * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the
> + * other cases which must be called after interrupts are enabled on the host.
> + */
> +static int handle_accel_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
> +{
> +       if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
> +               ((data & KVM_APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
> +               ((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
> +
> +               kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
> +               return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
> +       }
> +
> +       return 1;
> +}
> +
> +enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu)
> +{
> +       u32 msr = kvm_rcx_read(vcpu);
> +       u64 data = kvm_read_edx_eax(vcpu);
> +       int ret = 0;
> +
> +       switch (msr) {
> +       case APIC_BASE_MSR + (APIC_ICR >> 4):
> +               ret = handle_accel_set_x2apic_icr_irqoff(vcpu, data);
> +               break;
> +       default:
> +               return ACCEL_EXIT_NONE;
> +       }
> +
> +       if (!ret) {
> +               trace_kvm_msr_write(msr, data);
> +               return ACCEL_EXIT_SKIP_EMUL_INS;
> +       }
> +
> +       return ACCEL_EXIT_NONE;
> +}
> +EXPORT_SYMBOL_GPL(handle_accel_set_msr_irqoff);
> +
> +/*
>   * Adapt set_msr() to msr_io()'s calling convention
>   */
>  static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
> @@ -7984,6 +8027,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>         bool req_int_win =
>                 dm_request_for_irq_injection(vcpu) &&
>                 kvm_cpu_accept_dm_intr(vcpu);
> +       enum accel_exit_completion accel_exit = ACCEL_EXIT_NONE;
>
>         bool req_immediate_exit = false;
>
> @@ -8226,7 +8270,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>         vcpu->mode = OUTSIDE_GUEST_MODE;
>         smp_wmb();
>
> -       kvm_x86_ops->handle_exit_irqoff(vcpu);
> +       kvm_x86_ops->handle_exit_irqoff(vcpu, &accel_exit);
>
>         /*
>          * Consume any pending interrupts, including the possible source of
> @@ -8270,7 +8314,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>                 kvm_lapic_sync_from_vapic(vcpu);
>
>         vcpu->arch.gpa_available = false;
> -       r = kvm_x86_ops->handle_exit(vcpu);
> +       r = kvm_x86_ops->handle_exit(vcpu, accel_exit);
>         return r;
>
>  cancel_injection:
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 29391af..f14ec14 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -291,6 +291,7 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
>  bool kvm_vector_hashing_enabled(void);
>  int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
>                             int emulation_type, void *insn, int insn_len);
> +enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu);
>
>  #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
>                                 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
> --
> 2.7.4
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
  2019-11-28  0:27 ` [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
@ 2019-12-09  8:15   ` Wanpeng Li
  2019-12-09 17:03     ` Paolo Bonzini
  0 siblings, 1 reply; 9+ messages in thread
From: Wanpeng Li @ 2019-12-09  8:15 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: LKML, kvm, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Liran Alon

kindly ping after the merge window. :)
On Thu, 28 Nov 2019 at 08:27, Wanpeng Li <kernellwp@gmail.com> wrote:
>
> ping to catch the second week of the merge window. :)
> On Thu, 21 Nov 2019 at 11:17, Wanpeng Li <kernellwp@gmail.com> wrote:
> >
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in our
> > product observation, multicast IPIs are not as common as unicast IPI like
> > RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.
> >
> > This patch introduce a mechanism to handle certain performance-critical
> > WRMSRs in a very early stage of KVM VMExit handler.
> >
> > This mechanism is specifically used for accelerating writes to x2APIC ICR
> > that attempt to send a virtual IPI with physical destination-mode, fixed
> > delivery-mode and single target. Which was found as one of the main causes
> > of VMExits for Linux workloads.
> >
> > The reason this mechanism significantly reduce the latency of such virtual
> > IPIs is by sending the physical IPI to the target vCPU in a very early stage
> > of KVM VMExit handler, before host interrupts are enabled and before expensive
> > operations such as reacquiring KVM’s SRCU lock.
> > Latency is reduced even more when KVM is able to use APICv posted-interrupt
> > mechanism (which allows to deliver the virtual IPI directly to target vCPU
> > without the need to kick it to host).
> >
> > Testing on Xeon Skylake server:
> >
> > The virtual IPI latency from sender send to receiver receive reduces
> > more than 200+ cpu cycles.
> >
> > Reviewed-by: Liran Alon <liran.alon@oracle.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Radim Krčmář <rkrcmar@redhat.com>
> > Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> > Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> > Cc: Liran Alon <liran.alon@oracle.com>
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > ---
> > v3 -> v4:
> >  * check !is_guest_mode(vcpu)
> >  * ACCEL_EXIT_SKIP_EMUL_INS don't need be -1
> >  * move comments on top of handle_accel_set_x2apic_icr_irqoff
> >  * update patch description
> > v2 -> v3:
> >  * for both VMX and SVM
> >  * vmx_handle_exit() get second parameter by value and not by pointer
> >  * rename parameter to “accel_exit_completion”
> >  * preserve tracepoint ordering
> >  * rename handler to handle_accel_set_msr_irqoff and more generic
> >  * add comments above handle_accel_set_msr_irqoff
> >  * msr index APIC_BASE_MSR + (APIC_ICR >> 4)
> > v1 -> v2:
> >  * add tracepoint
> >  * Instead of a separate vcpu->fast_vmexit, set exit_reason
> >   to vmx->exit_reason to -1 if the fast path succeeds.
> >  * move the "kvm_skip_emulated_instruction(vcpu)" to vmx_handle_exit
> >  * moving the handling into vmx_handle_exit_irqoff()
> >
> >  arch/x86/include/asm/kvm_host.h | 11 ++++++++--
> >  arch/x86/kvm/svm.c              | 15 +++++++++----
> >  arch/x86/kvm/vmx/vmx.c          | 14 +++++++++---
> >  arch/x86/kvm/x86.c              | 48 +++++++++++++++++++++++++++++++++++++++--
> >  arch/x86/kvm/x86.h              |  1 +
> >  5 files changed, 78 insertions(+), 11 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 898ab9e..62af1c5 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -175,6 +175,11 @@ enum {
> >         VCPU_SREG_LDTR,
> >  };
> >
> > +enum accel_exit_completion {
> > +       ACCEL_EXIT_NONE,
> > +       ACCEL_EXIT_SKIP_EMUL_INS,
> > +};
> > +
> >  #include <asm/kvm_emulate.h>
> >
> >  #define KVM_NR_MEM_OBJS 40
> > @@ -1084,7 +1089,8 @@ struct kvm_x86_ops {
> >         void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
> >
> >         void (*run)(struct kvm_vcpu *vcpu);
> > -       int (*handle_exit)(struct kvm_vcpu *vcpu);
> > +       int (*handle_exit)(struct kvm_vcpu *vcpu,
> > +               enum accel_exit_completion accel_exit);
> >         int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
> >         void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
> >         u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
> > @@ -1134,7 +1140,8 @@ struct kvm_x86_ops {
> >         int (*check_intercept)(struct kvm_vcpu *vcpu,
> >                                struct x86_instruction_info *info,
> >                                enum x86_intercept_stage stage);
> > -       void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
> > +       void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
> > +               enum accel_exit_completion *accel_exit);
> >         bool (*mpx_supported)(void);
> >         bool (*xsaves_supported)(void);
> >         bool (*umip_emulated)(void);
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index d02a73a..d0367c4 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -4929,7 +4929,8 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
> >         *info2 = control->exit_info_2;
> >  }
> >
> > -static int handle_exit(struct kvm_vcpu *vcpu)
> > +static int handle_exit(struct kvm_vcpu *vcpu,
> > +       enum accel_exit_completion accel_exit)
> >  {
> >         struct vcpu_svm *svm = to_svm(vcpu);
> >         struct kvm_run *kvm_run = vcpu->run;
> > @@ -4987,7 +4988,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
> >                        __func__, svm->vmcb->control.exit_int_info,
> >                        exit_code);
> >
> > -       if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
> > +       if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
> > +               kvm_skip_emulated_instruction(vcpu);
> > +               return 1;
> > +       } else if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
> >             || !svm_exit_handlers[exit_code]) {
> >                 vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
> >                 dump_vmcb(vcpu);
> > @@ -6187,9 +6191,12 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
> >         return ret;
> >  }
> >
> > -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> > +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
> > +       enum accel_exit_completion *accel_exit)
> >  {
> > -
> > +       if (!is_guest_mode(vcpu) &&
> > +               to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE)
> > +               *accel_exit = handle_accel_set_msr_irqoff(vcpu);
> >  }
> >
> >  static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 621142e5..5d77188 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -5792,7 +5792,8 @@ void dump_vmcs(void)
> >   * The guest has exited.  See if we can fix it or if we need userspace
> >   * assistance.
> >   */
> > -static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> > +static int vmx_handle_exit(struct kvm_vcpu *vcpu,
> > +       enum accel_exit_completion accel_exit)
> >  {
> >         struct vcpu_vmx *vmx = to_vmx(vcpu);
> >         u32 exit_reason = vmx->exit_reason;
> > @@ -5878,7 +5879,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> >                 }
> >         }
> >
> > -       if (exit_reason < kvm_vmx_max_exit_handlers
> > +       if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
> > +               kvm_skip_emulated_instruction(vcpu);
> > +               return 1;
> > +       } else if (exit_reason < kvm_vmx_max_exit_handlers
> >             && kvm_vmx_exit_handlers[exit_reason]) {
> >  #ifdef CONFIG_RETPOLINE
> >                 if (exit_reason == EXIT_REASON_MSR_WRITE)
> > @@ -6223,7 +6227,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
> >  }
> >  STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
> >
> > -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> > +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
> > +       enum accel_exit_completion *accel_exit)
> >  {
> >         struct vcpu_vmx *vmx = to_vmx(vcpu);
> >
> > @@ -6231,6 +6236,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> >                 handle_external_interrupt_irqoff(vcpu);
> >         else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
> >                 handle_exception_nmi_irqoff(vmx);
> > +       else if (!is_guest_mode(vcpu) &&
> > +               vmx->exit_reason == EXIT_REASON_MSR_WRITE)
> > +               *accel_exit = handle_accel_set_msr_irqoff(vcpu);
> >  }
> >
> >  static bool vmx_has_emulated_msr(int index)
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 991dd01..c55348c 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1510,6 +1510,49 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
> >  EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
> >
> >  /*
> > + * The fast path for frequent and performance sensitive wrmsr emulation,
> > + * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
> > + * the latency of virtual IPI by avoiding the expensive bits of transitioning
> > + * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the
> > + * other cases which must be called after interrupts are enabled on the host.
> > + */
> > +static int handle_accel_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
> > +{
> > +       if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
> > +               ((data & KVM_APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
> > +               ((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
> > +
> > +               kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
> > +               return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
> > +       }
> > +
> > +       return 1;
> > +}
> > +
> > +enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu)
> > +{
> > +       u32 msr = kvm_rcx_read(vcpu);
> > +       u64 data = kvm_read_edx_eax(vcpu);
> > +       int ret = 0;
> > +
> > +       switch (msr) {
> > +       case APIC_BASE_MSR + (APIC_ICR >> 4):
> > +               ret = handle_accel_set_x2apic_icr_irqoff(vcpu, data);
> > +               break;
> > +       default:
> > +               return ACCEL_EXIT_NONE;
> > +       }
> > +
> > +       if (!ret) {
> > +               trace_kvm_msr_write(msr, data);
> > +               return ACCEL_EXIT_SKIP_EMUL_INS;
> > +       }
> > +
> > +       return ACCEL_EXIT_NONE;
> > +}
> > +EXPORT_SYMBOL_GPL(handle_accel_set_msr_irqoff);
> > +
> > +/*
> >   * Adapt set_msr() to msr_io()'s calling convention
> >   */
> >  static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
> > @@ -7984,6 +8027,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >         bool req_int_win =
> >                 dm_request_for_irq_injection(vcpu) &&
> >                 kvm_cpu_accept_dm_intr(vcpu);
> > +       enum accel_exit_completion accel_exit = ACCEL_EXIT_NONE;
> >
> >         bool req_immediate_exit = false;
> >
> > @@ -8226,7 +8270,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >         vcpu->mode = OUTSIDE_GUEST_MODE;
> >         smp_wmb();
> >
> > -       kvm_x86_ops->handle_exit_irqoff(vcpu);
> > +       kvm_x86_ops->handle_exit_irqoff(vcpu, &accel_exit);
> >
> >         /*
> >          * Consume any pending interrupts, including the possible source of
> > @@ -8270,7 +8314,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >                 kvm_lapic_sync_from_vapic(vcpu);
> >
> >         vcpu->arch.gpa_available = false;
> > -       r = kvm_x86_ops->handle_exit(vcpu);
> > +       r = kvm_x86_ops->handle_exit(vcpu, accel_exit);
> >         return r;
> >
> >  cancel_injection:
> > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> > index 29391af..f14ec14 100644
> > --- a/arch/x86/kvm/x86.h
> > +++ b/arch/x86/kvm/x86.h
> > @@ -291,6 +291,7 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
> >  bool kvm_vector_hashing_enabled(void);
> >  int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
> >                             int emulation_type, void *insn, int insn_len);
> > +enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu);
> >
> >  #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
> >                                 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
> > --
> > 2.7.4
> >

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
  2019-12-09  8:15   ` Wanpeng Li
@ 2019-12-09 17:03     ` Paolo Bonzini
  2019-12-10  0:19       ` Wanpeng Li
  0 siblings, 1 reply; 9+ messages in thread
From: Paolo Bonzini @ 2019-12-09 17:03 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: LKML, kvm, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Liran Alon

On 09/12/19 09:15, Wanpeng Li wrote:
> kindly ping after the merge window. :)

Looks good.  Naming is hard, and I don't like very much the "accel"
part.  As soon as I come up with some names I prefer I will propose them
and apply the patch.

Paolo


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
  2019-12-09 17:03     ` Paolo Bonzini
@ 2019-12-10  0:19       ` Wanpeng Li
  0 siblings, 0 replies; 9+ messages in thread
From: Wanpeng Li @ 2019-12-10  0:19 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: LKML, kvm, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Liran Alon

On Tue, 10 Dec 2019 at 01:03, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 09/12/19 09:15, Wanpeng Li wrote:
> > kindly ping after the merge window. :)
>
> Looks good.  Naming is hard, and I don't like very much the "accel"
> part.  As soon as I come up with some names I prefer I will propose them
> and apply the patch.

Great! Thanks!

    Wanpeng

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
  2019-11-21  3:17 [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
  2019-11-21  3:17 ` [PATCH v4 2/2] KVM: LAPIC: micro-optimize fixed mode ipi delivery Wanpeng Li
  2019-11-28  0:27 ` [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
@ 2020-01-15 17:48 ` Paolo Bonzini
  2020-02-26  3:32 ` Wanpeng Li
  3 siblings, 0 replies; 9+ messages in thread
From: Paolo Bonzini @ 2020-01-15 17:48 UTC (permalink / raw)
  To: Wanpeng Li, linux-kernel, kvm
  Cc: Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Liran Alon

On 21/11/19 04:17, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in our 
> product observation, multicast IPIs are not as common as unicast IPI like 
> RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.
> 
> This patch introduce a mechanism to handle certain performance-critical 
> WRMSRs in a very early stage of KVM VMExit handler.
> 
> This mechanism is specifically used for accelerating writes to x2APIC ICR 
> that attempt to send a virtual IPI with physical destination-mode, fixed 
> delivery-mode and single target. Which was found as one of the main causes 
> of VMExits for Linux workloads.
> 
> The reason this mechanism significantly reduce the latency of such virtual 
> IPIs is by sending the physical IPI to the target vCPU in a very early stage 
> of KVM VMExit handler, before host interrupts are enabled and before expensive
> operations such as reacquiring KVM’s SRCU lock.
> Latency is reduced even more when KVM is able to use APICv posted-interrupt
> mechanism (which allows to deliver the virtual IPI directly to target vCPU 
> without the need to kick it to host).
> 
> Testing on Xeon Skylake server:
> 
> The virtual IPI latency from sender send to receiver receive reduces 
> more than 200+ cpu cycles.

Applied with s/accel_exit/exit_fastpath/ and s/accel/fastpath/.

Paolo

> Reviewed-by: Liran Alon <liran.alon@oracle.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> Cc: Liran Alon <liran.alon@oracle.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
> v3 -> v4:
>  * check !is_guest_mode(vcpu)
>  * ACCEL_EXIT_SKIP_EMUL_INS don't need be -1
>  * move comments on top of handle_accel_set_x2apic_icr_irqoff
>  * update patch description
> v2 -> v3:
>  * for both VMX and SVM
>  * vmx_handle_exit() get second parameter by value and not by pointer
>  * rename parameter to “accel_exit_completion”
>  * preserve tracepoint ordering
>  * rename handler to handle_accel_set_msr_irqoff and more generic
>  * add comments above handle_accel_set_msr_irqoff
>  * msr index APIC_BASE_MSR + (APIC_ICR >> 4)
> v1 -> v2:
>  * add tracepoint
>  * Instead of a separate vcpu->fast_vmexit, set exit_reason
>   to vmx->exit_reason to -1 if the fast path succeeds.
>  * move the "kvm_skip_emulated_instruction(vcpu)" to vmx_handle_exit
>  * moving the handling into vmx_handle_exit_irqoff()
> 
>  arch/x86/include/asm/kvm_host.h | 11 ++++++++--
>  arch/x86/kvm/svm.c              | 15 +++++++++----
>  arch/x86/kvm/vmx/vmx.c          | 14 +++++++++---
>  arch/x86/kvm/x86.c              | 48 +++++++++++++++++++++++++++++++++++++++--
>  arch/x86/kvm/x86.h              |  1 +
>  5 files changed, 78 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 898ab9e..62af1c5 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -175,6 +175,11 @@ enum {
>  	VCPU_SREG_LDTR,
>  };
>  
> +enum accel_exit_completion {
> +	ACCEL_EXIT_NONE,
> +	ACCEL_EXIT_SKIP_EMUL_INS,
> +};
> +
>  #include <asm/kvm_emulate.h>
>  
>  #define KVM_NR_MEM_OBJS 40
> @@ -1084,7 +1089,8 @@ struct kvm_x86_ops {
>  	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
>  
>  	void (*run)(struct kvm_vcpu *vcpu);
> -	int (*handle_exit)(struct kvm_vcpu *vcpu);
> +	int (*handle_exit)(struct kvm_vcpu *vcpu,
> +		enum accel_exit_completion accel_exit);
>  	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
>  	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
>  	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
> @@ -1134,7 +1140,8 @@ struct kvm_x86_ops {
>  	int (*check_intercept)(struct kvm_vcpu *vcpu,
>  			       struct x86_instruction_info *info,
>  			       enum x86_intercept_stage stage);
> -	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
> +	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
> +		enum accel_exit_completion *accel_exit);
>  	bool (*mpx_supported)(void);
>  	bool (*xsaves_supported)(void);
>  	bool (*umip_emulated)(void);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d02a73a..d0367c4 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -4929,7 +4929,8 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
>  	*info2 = control->exit_info_2;
>  }
>  
> -static int handle_exit(struct kvm_vcpu *vcpu)
> +static int handle_exit(struct kvm_vcpu *vcpu,
> +	enum accel_exit_completion accel_exit)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  	struct kvm_run *kvm_run = vcpu->run;
> @@ -4987,7 +4988,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
>  		       __func__, svm->vmcb->control.exit_int_info,
>  		       exit_code);
>  
> -	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
> +	if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
> +		kvm_skip_emulated_instruction(vcpu);
> +		return 1;
> +	} else if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
>  	    || !svm_exit_handlers[exit_code]) {
>  		vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
>  		dump_vmcb(vcpu);
> @@ -6187,9 +6191,12 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
>  	return ret;
>  }
>  
> -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
> +	enum accel_exit_completion *accel_exit)
>  {
> -
> +	if (!is_guest_mode(vcpu) &&
> +		to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE)
> +		*accel_exit = handle_accel_set_msr_irqoff(vcpu);
>  }
>  
>  static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 621142e5..5d77188 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5792,7 +5792,8 @@ void dump_vmcs(void)
>   * The guest has exited.  See if we can fix it or if we need userspace
>   * assistance.
>   */
> -static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> +static int vmx_handle_exit(struct kvm_vcpu *vcpu,
> +	enum accel_exit_completion accel_exit)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	u32 exit_reason = vmx->exit_reason;
> @@ -5878,7 +5879,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
>  		}
>  	}
>  
> -	if (exit_reason < kvm_vmx_max_exit_handlers
> +	if (accel_exit == ACCEL_EXIT_SKIP_EMUL_INS) {
> +		kvm_skip_emulated_instruction(vcpu);
> +		return 1;
> +	} else if (exit_reason < kvm_vmx_max_exit_handlers
>  	    && kvm_vmx_exit_handlers[exit_reason]) {
>  #ifdef CONFIG_RETPOLINE
>  		if (exit_reason == EXIT_REASON_MSR_WRITE)
> @@ -6223,7 +6227,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
>  }
>  STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
>  
> -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
> +	enum accel_exit_completion *accel_exit)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  
> @@ -6231,6 +6236,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
>  		handle_external_interrupt_irqoff(vcpu);
>  	else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
>  		handle_exception_nmi_irqoff(vmx);
> +	else if (!is_guest_mode(vcpu) &&
> +		vmx->exit_reason == EXIT_REASON_MSR_WRITE)
> +		*accel_exit = handle_accel_set_msr_irqoff(vcpu);
>  }
>  
>  static bool vmx_has_emulated_msr(int index)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 991dd01..c55348c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1510,6 +1510,49 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
>  EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
>  
>  /*
> + * The fast path for frequent and performance sensitive wrmsr emulation,
> + * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
> + * the latency of virtual IPI by avoiding the expensive bits of transitioning
> + * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the
> + * other cases which must be called after interrupts are enabled on the host.
> + */
> +static int handle_accel_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
> +{
> +	if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
> +		((data & KVM_APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
> +		((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
> +
> +		kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
> +		return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
> +	}
> +
> +	return 1;
> +}
> +
> +enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu)
> +{
> +	u32 msr = kvm_rcx_read(vcpu);
> +	u64 data = kvm_read_edx_eax(vcpu);
> +	int ret = 0;
> +
> +	switch (msr) {
> +	case APIC_BASE_MSR + (APIC_ICR >> 4):
> +		ret = handle_accel_set_x2apic_icr_irqoff(vcpu, data);
> +		break;
> +	default:
> +		return ACCEL_EXIT_NONE;
> +	}
> +
> +	if (!ret) {
> +		trace_kvm_msr_write(msr, data);
> +		return ACCEL_EXIT_SKIP_EMUL_INS;
> +	}
> +
> +	return ACCEL_EXIT_NONE;
> +}
> +EXPORT_SYMBOL_GPL(handle_accel_set_msr_irqoff);
> +
> +/*
>   * Adapt set_msr() to msr_io()'s calling convention
>   */
>  static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
> @@ -7984,6 +8027,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	bool req_int_win =
>  		dm_request_for_irq_injection(vcpu) &&
>  		kvm_cpu_accept_dm_intr(vcpu);
> +	enum accel_exit_completion accel_exit = ACCEL_EXIT_NONE;
>  
>  	bool req_immediate_exit = false;
>  
> @@ -8226,7 +8270,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	vcpu->mode = OUTSIDE_GUEST_MODE;
>  	smp_wmb();
>  
> -	kvm_x86_ops->handle_exit_irqoff(vcpu);
> +	kvm_x86_ops->handle_exit_irqoff(vcpu, &accel_exit);
>  
>  	/*
>  	 * Consume any pending interrupts, including the possible source of
> @@ -8270,7 +8314,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		kvm_lapic_sync_from_vapic(vcpu);
>  
>  	vcpu->arch.gpa_available = false;
> -	r = kvm_x86_ops->handle_exit(vcpu);
> +	r = kvm_x86_ops->handle_exit(vcpu, accel_exit);
>  	return r;
>  
>  cancel_injection:
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 29391af..f14ec14 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -291,6 +291,7 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
>  bool kvm_vector_hashing_enabled(void);
>  int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
>  			    int emulation_type, void *insn, int insn_len);
> +enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu);
>  
>  #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
>  				| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 2/2] KVM: LAPIC: micro-optimize fixed mode ipi delivery
  2019-11-21  3:17 ` [PATCH v4 2/2] KVM: LAPIC: micro-optimize fixed mode ipi delivery Wanpeng Li
@ 2020-01-15 17:51   ` Paolo Bonzini
  0 siblings, 0 replies; 9+ messages in thread
From: Paolo Bonzini @ 2020-01-15 17:51 UTC (permalink / raw)
  To: Wanpeng Li, linux-kernel, kvm
  Cc: Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel

On 21/11/19 04:17, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> This patch optimizes redundancy logic before fixed mode ipi is delivered
> in the fast path, broadcast handling needs to go slow path, so the delivery
> mode repair can be delayed to before slow path.
> 
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  arch/x86/kvm/irq_comm.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 8ecd48d..aa88156 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -52,15 +52,15 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>  	unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
>  	unsigned int dest_vcpus = 0;
>  
> +	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
> +		return r;
> +
>  	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
>  			kvm_lowest_prio_delivery(irq)) {
>  		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
>  		irq->delivery_mode = APIC_DM_FIXED;
>  	}
>  
> -	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
> -		return r;
> -
>  	memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
>  
>  	kvm_for_each_vcpu(i, vcpu, kvm) {
> 

Applied.

Paolo


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath
  2019-11-21  3:17 [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
                   ` (2 preceding siblings ...)
  2020-01-15 17:48 ` Paolo Bonzini
@ 2020-02-26  3:32 ` Wanpeng Li
  3 siblings, 0 replies; 9+ messages in thread
From: Wanpeng Li @ 2020-02-26  3:32 UTC (permalink / raw)
  To: LKML, kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Liran Alon

On Thu, 21 Nov 2019 at 11:17, Wanpeng Li <kernellwp@gmail.com> wrote:
>
> From: Wanpeng Li <wanpengli@tencent.com>
>
> ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in our
> product observation, multicast IPIs are not as common as unicast IPI like
> RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.
>
> This patch introduce a mechanism to handle certain performance-critical
> WRMSRs in a very early stage of KVM VMExit handler.
>
> This mechanism is specifically used for accelerating writes to x2APIC ICR
> that attempt to send a virtual IPI with physical destination-mode, fixed
> delivery-mode and single target. Which was found as one of the main causes
> of VMExits for Linux workloads.
>
> The reason this mechanism significantly reduce the latency of such virtual
> IPIs is by sending the physical IPI to the target vCPU in a very early stage
> of KVM VMExit handler, before host interrupts are enabled and before expensive
> operations such as reacquiring KVM’s SRCU lock.
> Latency is reduced even more when KVM is able to use APICv posted-interrupt
> mechanism (which allows to deliver the virtual IPI directly to target vCPU
> without the need to kick it to host).
>
> Testing on Xeon Skylake server:
>
> The virtual IPI latency from sender send to receiver receive reduces
> more than 200+ cpu cycles.

Testing by IPI microbenchmark(https://lkml.org/lkml/2017/12/19/141):

Normal IPI:           Improved 3%
Broadcast IPI:      Improved 5%

w/ --overcommit cpu-pm=on

Normal IPI:           Improved 14%
Broadcast IPI:      Improved 3.6%

    Wanpeng

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-02-26  3:33 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-21  3:17 [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
2019-11-21  3:17 ` [PATCH v4 2/2] KVM: LAPIC: micro-optimize fixed mode ipi delivery Wanpeng Li
2020-01-15 17:51   ` Paolo Bonzini
2019-11-28  0:27 ` [PATCH v4 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath Wanpeng Li
2019-12-09  8:15   ` Wanpeng Li
2019-12-09 17:03     ` Paolo Bonzini
2019-12-10  0:19       ` Wanpeng Li
2020-01-15 17:48 ` Paolo Bonzini
2020-02-26  3:32 ` Wanpeng Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).