All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/5] KVM: LAPIC: Optimize timer latency further
@ 2019-05-16  3:06 Wanpeng Li
  2019-05-16  3:06 ` [PATCH v3 1/5] KVM: LAPIC: Extract adaptive tune timer advancement logic Wanpeng Li
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: Wanpeng Li @ 2019-05-16  3:06 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: Paolo Bonzini, Radim Krčmář

Advance lapic timer tries to hidden the hypervisor overhead between the 
host emulated timer fires and the guest awares the timer is fired. However, 
it just hidden the time between apic_timer_fn/handle_preemption_timer -> 
wait_lapic_expire, instead of the real position of vmentry which is 
mentioned in the orignial commit d0659d946be0 ("KVM: x86: add option to 
advance tscdeadline hrtimer expiration"). There is 700+ cpu cycles between 
the end of wait_lapic_expire and before world switch on my haswell desktop.

This patchset tries to narrow the last gap(wait_lapic_expire -> world switch), 
it takes the real overhead time between apic_timer_fn/handle_preemption_timer
and before world switch into consideration when adaptively tuning timer 
advancement. The patchset can reduce 40% latency (~1600+ cycles to ~1000+ 
cycles on a haswell desktop) for kvm-unit-tests/tscdeadline_latency when 
testing busy waits.

v2 -> v3:
 * expose 'kvm_timer.timer_advance_ns' to userspace
 * move the tracepoint below guest_exit_irqoff()
 * move wait_lapic_expire() before flushing the L1

v1 -> v2:
 * fix indent in patch 1/4
 * remove the wait_lapic_expire() tracepoint and expose by debugfs
 * move the call to wait_lapic_expire() into vmx.c and svm.c

Wanpeng Li (5):
  KVM: LAPIC: Extract adaptive tune timer advancement logic
  KVM: LAPIC: Fix lapic_timer_advance_ns parameter overflow
  KVM: LAPIC: Expose per-vCPU timer_advance_ns to userspace
  KVM: LAPIC: Delay trace advance expire delta
  KVM: LAPIC: Optimize timer latency further

 arch/x86/kvm/debugfs.c | 16 +++++++++++++
 arch/x86/kvm/lapic.c   | 62 +++++++++++++++++++++++++++++---------------------
 arch/x86/kvm/lapic.h   |  3 ++-
 arch/x86/kvm/svm.c     |  4 ++++
 arch/x86/kvm/vmx/vmx.c |  4 ++++
 arch/x86/kvm/x86.c     |  7 +++---
 6 files changed, 65 insertions(+), 31 deletions(-)

-- 
2.7.4


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3 1/5] KVM: LAPIC: Extract adaptive tune timer advancement logic
  2019-05-16  3:06 [PATCH v3 0/5] KVM: LAPIC: Optimize timer latency further Wanpeng Li
@ 2019-05-16  3:06 ` Wanpeng Li
  2019-05-16  3:06 ` [PATCH v3 2/5] KVM: LAPIC: Fix lapic_timer_advance_ns parameter overflow Wanpeng Li
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 15+ messages in thread
From: Wanpeng Li @ 2019-05-16  3:06 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Liran Alon

From: Wanpeng Li <wanpengli@tencent.com>

Extract adaptive tune timer advancement logic to a single function.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kvm/lapic.c | 57 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bd13fdd..2f364fe 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1501,11 +1501,40 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
 	}
 }
 
-void wait_lapic_expire(struct kvm_vcpu *vcpu)
+static inline void adaptive_tune_timer_advancement(struct kvm_vcpu *vcpu,
+				u64 guest_tsc, u64 tsc_deadline)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
-	u64 guest_tsc, tsc_deadline, ns;
+	u64 ns;
+
+	/* too early */
+	if (guest_tsc < tsc_deadline) {
+		ns = (tsc_deadline - guest_tsc) * 1000000ULL;
+		do_div(ns, vcpu->arch.virtual_tsc_khz);
+		timer_advance_ns -= min((u32)ns,
+			timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+	} else {
+	/* too late */
+		ns = (guest_tsc - tsc_deadline) * 1000000ULL;
+		do_div(ns, vcpu->arch.virtual_tsc_khz);
+		timer_advance_ns += min((u32)ns,
+			timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+	}
+
+	if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
+		apic->lapic_timer.timer_advance_adjust_done = true;
+	if (unlikely(timer_advance_ns > 5000)) {
+		timer_advance_ns = 0;
+		apic->lapic_timer.timer_advance_adjust_done = true;
+	}
+	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
+}
+
+void wait_lapic_expire(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	u64 guest_tsc, tsc_deadline;
 
 	if (apic->lapic_timer.expired_tscdeadline == 0)
 		return;
@@ -1521,28 +1550,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 	if (guest_tsc < tsc_deadline)
 		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
 
-	if (!apic->lapic_timer.timer_advance_adjust_done) {
-		/* too early */
-		if (guest_tsc < tsc_deadline) {
-			ns = (tsc_deadline - guest_tsc) * 1000000ULL;
-			do_div(ns, vcpu->arch.virtual_tsc_khz);
-			timer_advance_ns -= min((u32)ns,
-				timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
-		} else {
-		/* too late */
-			ns = (guest_tsc - tsc_deadline) * 1000000ULL;
-			do_div(ns, vcpu->arch.virtual_tsc_khz);
-			timer_advance_ns += min((u32)ns,
-				timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
-		}
-		if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
-			apic->lapic_timer.timer_advance_adjust_done = true;
-		if (unlikely(timer_advance_ns > 5000)) {
-			timer_advance_ns = 0;
-			apic->lapic_timer.timer_advance_adjust_done = true;
-		}
-		apic->lapic_timer.timer_advance_ns = timer_advance_ns;
-	}
+	if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
+		adaptive_tune_timer_advancement(vcpu, guest_tsc, tsc_deadline);
 }
 
 static void start_sw_tscdeadline(struct kvm_lapic *apic)
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v3 2/5] KVM: LAPIC: Fix lapic_timer_advance_ns parameter overflow
  2019-05-16  3:06 [PATCH v3 0/5] KVM: LAPIC: Optimize timer latency further Wanpeng Li
  2019-05-16  3:06 ` [PATCH v3 1/5] KVM: LAPIC: Extract adaptive tune timer advancement logic Wanpeng Li
@ 2019-05-16  3:06 ` Wanpeng Li
  2019-05-16  3:06 ` [PATCH v3 3/5] KVM: LAPIC: Expose per-vCPU timer_advance_ns to userspace Wanpeng Li
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 15+ messages in thread
From: Wanpeng Li @ 2019-05-16  3:06 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Liran Alon

From: Wanpeng Li <wanpengli@tencent.com>

After commit c3941d9e0 (KVM: lapic: Allow user to disable adaptive tuning of
timer advancement), '-1' enables adaptive tuning starting from default
advancment of 1000ns. However, we should expose an int instead of an overflow
uint module parameter.

Before patch:

/sys/module/kvm/parameters/lapic_timer_advance_ns:4294967295

After patch:

/sys/module/kvm/parameters/lapic_timer_advance_ns:-1

Fixes: c3941d9e0 (KVM: lapic: Allow user to disable adaptive tuning of timer advancement)
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b63e7b0..f2e3847 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -143,7 +143,7 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
  * tuning, i.e. allows priveleged userspace to set an exact advancement time.
  */
 static int __read_mostly lapic_timer_advance_ns = -1;
-module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
+module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
 
 static bool __read_mostly vector_hashing = true;
 module_param(vector_hashing, bool, S_IRUGO);
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v3 3/5] KVM: LAPIC: Expose per-vCPU timer_advance_ns to userspace
  2019-05-16  3:06 [PATCH v3 0/5] KVM: LAPIC: Optimize timer latency further Wanpeng Li
  2019-05-16  3:06 ` [PATCH v3 1/5] KVM: LAPIC: Extract adaptive tune timer advancement logic Wanpeng Li
  2019-05-16  3:06 ` [PATCH v3 2/5] KVM: LAPIC: Fix lapic_timer_advance_ns parameter overflow Wanpeng Li
@ 2019-05-16  3:06 ` Wanpeng Li
  2019-05-17 20:05   ` Sean Christopherson
  2019-05-16  3:06 ` [PATCH v3 4/5] KVM: LAPIC: Delay trace advance expire delta Wanpeng Li
  2019-05-16  3:06 ` [PATCH v3 5/5] KVM: LAPIC: Optimize timer latency further Wanpeng Li
  4 siblings, 1 reply; 15+ messages in thread
From: Wanpeng Li @ 2019-05-16  3:06 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Liran Alon

From: Wanpeng Li <wanpengli@tencent.com>

Expose per-vCPU timer_advance_ns to userspace, so it is able to 
query the auto-adjusted value.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kvm/debugfs.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index c19c7ed..a6f1f93 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -9,12 +9,22 @@
  */
 #include <linux/kvm_host.h>
 #include <linux/debugfs.h>
+#include "lapic.h"
 
 bool kvm_arch_has_vcpu_debugfs(void)
 {
 	return true;
 }
 
+static int vcpu_get_timer_advance_ns(void *data, u64 *val)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
+	*val = vcpu->arch.apic->lapic_timer.timer_advance_ns;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_timer_advance_ns_fops, vcpu_get_timer_advance_ns, NULL, "%llu\n");
+
 static int vcpu_get_tsc_offset(void *data, u64 *val)
 {
 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
@@ -51,6 +61,12 @@ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
 	if (!ret)
 		return -ENOMEM;
 
+	ret = debugfs_create_file("lapic_timer_advance_ns", 0444,
+							vcpu->debugfs_dentry,
+							vcpu, &vcpu_timer_advance_ns_fops);
+	if (!ret)
+		return -ENOMEM;
+
 	if (kvm_has_tsc_control) {
 		ret = debugfs_create_file("tsc-scaling-ratio", 0444,
 							vcpu->debugfs_dentry,
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v3 4/5] KVM: LAPIC: Delay trace advance expire delta
  2019-05-16  3:06 [PATCH v3 0/5] KVM: LAPIC: Optimize timer latency further Wanpeng Li
                   ` (2 preceding siblings ...)
  2019-05-16  3:06 ` [PATCH v3 3/5] KVM: LAPIC: Expose per-vCPU timer_advance_ns to userspace Wanpeng Li
@ 2019-05-16  3:06 ` Wanpeng Li
  2019-05-17 19:44   ` Sean Christopherson
  2019-05-16  3:06 ` [PATCH v3 5/5] KVM: LAPIC: Optimize timer latency further Wanpeng Li
  4 siblings, 1 reply; 15+ messages in thread
From: Wanpeng Li @ 2019-05-16  3:06 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Liran Alon

From: Wanpeng Li <wanpengli@tencent.com>

wait_lapic_expire() call was moved above guest_enter_irqoff() because of 
its tracepoint, which violated the RCU extended quiescent state invoked 
by guest_enter_irqoff()[1][2]. This patch simply moves the tracepoint 
below guest_exit_irqoff() in vcpu_enter_guest(). Snapshot the delta before 
VM-Enter, but trace it after VM-Exit. This can help us to move 
wait_lapic_expire() just before vmentry in the later patch.

[1] Commit 8b89fe1f6c43 ("kvm: x86: move tracepoints outside extended quiescent state")
[2] https://patchwork.kernel.org/patch/7821111/

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kvm/lapic.c | 16 ++++++++--------
 arch/x86/kvm/lapic.h |  1 +
 arch/x86/kvm/x86.c   |  2 ++
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2f364fe..af38ece 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1502,27 +1502,27 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
 }
 
 static inline void adaptive_tune_timer_advancement(struct kvm_vcpu *vcpu,
-				u64 guest_tsc, u64 tsc_deadline)
+				s64 advance_expire_delta)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
 	u64 ns;
 
 	/* too early */
-	if (guest_tsc < tsc_deadline) {
-		ns = (tsc_deadline - guest_tsc) * 1000000ULL;
+	if (advance_expire_delta < 0) {
+		ns = -advance_expire_delta * 1000000ULL;
 		do_div(ns, vcpu->arch.virtual_tsc_khz);
 		timer_advance_ns -= min((u32)ns,
 			timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
 	} else {
 	/* too late */
-		ns = (guest_tsc - tsc_deadline) * 1000000ULL;
+		ns = advance_expire_delta * 1000000ULL;
 		do_div(ns, vcpu->arch.virtual_tsc_khz);
 		timer_advance_ns += min((u32)ns,
 			timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
 	}
 
-	if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
+	if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
 		apic->lapic_timer.timer_advance_adjust_done = true;
 	if (unlikely(timer_advance_ns > 5000)) {
 		timer_advance_ns = 0;
@@ -1545,13 +1545,13 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 	tsc_deadline = apic->lapic_timer.expired_tscdeadline;
 	apic->lapic_timer.expired_tscdeadline = 0;
 	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
-	trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
+	apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
 
-	if (guest_tsc < tsc_deadline)
+	if (apic->lapic_timer.advance_expire_delta < 0)
 		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
 
 	if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
-		adaptive_tune_timer_advancement(vcpu, guest_tsc, tsc_deadline);
+		adaptive_tune_timer_advancement(vcpu, apic->lapic_timer.advance_expire_delta);
 }
 
 static void start_sw_tscdeadline(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index d6d049b..3e72a25 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -32,6 +32,7 @@ struct kvm_timer {
 	u64 tscdeadline;
 	u64 expired_tscdeadline;
 	u32 timer_advance_ns;
+	s64 advance_expire_delta;
 	atomic_t pending;			/* accumulated triggered timers */
 	bool hv_timer_in_use;
 	bool timer_advance_adjust_done;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f2e3847..4a7b00c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7961,6 +7961,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	++vcpu->stat.exits;
 
 	guest_exit_irqoff();
+	trace_kvm_wait_lapic_expire(vcpu->vcpu_id,
+		vcpu->arch.apic->lapic_timer.advance_expire_delta);
 
 	local_irq_enable();
 	preempt_enable();
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v3 5/5] KVM: LAPIC: Optimize timer latency further
  2019-05-16  3:06 [PATCH v3 0/5] KVM: LAPIC: Optimize timer latency further Wanpeng Li
                   ` (3 preceding siblings ...)
  2019-05-16  3:06 ` [PATCH v3 4/5] KVM: LAPIC: Delay trace advance expire delta Wanpeng Li
@ 2019-05-16  3:06 ` Wanpeng Li
  2019-05-17 19:50   ` Sean Christopherson
  4 siblings, 1 reply; 15+ messages in thread
From: Wanpeng Li @ 2019-05-16  3:06 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Liran Alon

From: Wanpeng Li <wanpengli@tencent.com>

Advance lapic timer tries to hidden the hypervisor overhead between the 
host emulated timer fires and the guest awares the timer is fired. However, 
it just hidden the time between apic_timer_fn/handle_preemption_timer -> 
wait_lapic_expire, instead of the real position of vmentry which is 
mentioned in the orignial commit d0659d946be0 ("KVM: x86: add option to 
advance tscdeadline hrtimer expiration"). There is 700+ cpu cycles between 
the end of wait_lapic_expire and before world switch on my haswell desktop.

This patch tries to narrow the last gap(wait_lapic_expire -> world switch), 
it takes the real overhead time between apic_timer_fn/handle_preemption_timer
and before world switch into consideration when adaptively tuning timer 
advancement. The patch can reduce 40% latency (~1600+ cycles to ~1000+ cycles 
on a haswell desktop) for kvm-unit-tests/tscdeadline_latency when testing 
busy waits.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kvm/lapic.c   | 3 ++-
 arch/x86/kvm/lapic.h   | 2 +-
 arch/x86/kvm/svm.c     | 4 ++++
 arch/x86/kvm/vmx/vmx.c | 4 ++++
 arch/x86/kvm/x86.c     | 3 ---
 5 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index af38ece..63513de 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1531,7 +1531,7 @@ static inline void adaptive_tune_timer_advancement(struct kvm_vcpu *vcpu,
 	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 }
 
-void wait_lapic_expire(struct kvm_vcpu *vcpu)
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u64 guest_tsc, tsc_deadline;
@@ -1553,6 +1553,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 	if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
 		adaptive_tune_timer_advancement(vcpu, apic->lapic_timer.advance_expire_delta);
 }
+EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
 
 static void start_sw_tscdeadline(struct kvm_lapic *apic)
 {
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 3e72a25..f974a3d 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -220,7 +220,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
 
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
-void wait_lapic_expire(struct kvm_vcpu *vcpu);
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
 
 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6b92eaf..955cfcb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5638,6 +5638,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	clgi();
 	kvm_load_guest_xcr0(vcpu);
 
+	if (lapic_in_kernel(vcpu) &&
+		vcpu->arch.apic->lapic_timer.timer_advance_ns)
+		kvm_wait_lapic_expire(vcpu);
+
 	/*
 	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
 	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e1fa935..771d3bf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6423,6 +6423,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	vmx_update_hv_timer(vcpu);
 
+	if (lapic_in_kernel(vcpu) &&
+		vcpu->arch.apic->lapic_timer.timer_advance_ns)
+		kvm_wait_lapic_expire(vcpu);
+
 	/*
 	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
 	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4a7b00c..e154f52 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7903,9 +7903,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	trace_kvm_entry(vcpu->vcpu_id);
-	if (lapic_in_kernel(vcpu) &&
-	    vcpu->arch.apic->lapic_timer.timer_advance_ns)
-		wait_lapic_expire(vcpu);
 	guest_enter_irqoff();
 
 	fpregs_assert_state_consistent();
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 4/5] KVM: LAPIC: Delay trace advance expire delta
  2019-05-16  3:06 ` [PATCH v3 4/5] KVM: LAPIC: Delay trace advance expire delta Wanpeng Li
@ 2019-05-17 19:44   ` Sean Christopherson
  2019-05-20  6:38     ` Wanpeng Li
  0 siblings, 1 reply; 15+ messages in thread
From: Sean Christopherson @ 2019-05-17 19:44 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: linux-kernel, kvm, Paolo Bonzini, Radim Krčmář,
	Liran Alon

On Thu, May 16, 2019 at 11:06:19AM +0800, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> wait_lapic_expire() call was moved above guest_enter_irqoff() because of 
> its tracepoint, which violated the RCU extended quiescent state invoked 
> by guest_enter_irqoff()[1][2]. This patch simply moves the tracepoint 
> below guest_exit_irqoff() in vcpu_enter_guest(). Snapshot the delta before 
> VM-Enter, but trace it after VM-Exit. This can help us to move 
> wait_lapic_expire() just before vmentry in the later patch.
> 
> [1] Commit 8b89fe1f6c43 ("kvm: x86: move tracepoints outside extended quiescent state")
> [2] https://patchwork.kernel.org/patch/7821111/
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Liran Alon <liran.alon@oracle.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  arch/x86/kvm/lapic.c | 16 ++++++++--------
>  arch/x86/kvm/lapic.h |  1 +
>  arch/x86/kvm/x86.c   |  2 ++
>  3 files changed, 11 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 2f364fe..af38ece 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1502,27 +1502,27 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
>  }
>  
>  static inline void adaptive_tune_timer_advancement(struct kvm_vcpu *vcpu,
> -				u64 guest_tsc, u64 tsc_deadline)
> +				s64 advance_expire_delta)
>  {
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  	u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
>  	u64 ns;
>  
>  	/* too early */
> -	if (guest_tsc < tsc_deadline) {
> -		ns = (tsc_deadline - guest_tsc) * 1000000ULL;
> +	if (advance_expire_delta < 0) {
> +		ns = -advance_expire_delta * 1000000ULL;
>  		do_div(ns, vcpu->arch.virtual_tsc_khz);
>  		timer_advance_ns -= min((u32)ns,
>  			timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
>  	} else {
>  	/* too late */
> -		ns = (guest_tsc - tsc_deadline) * 1000000ULL;
> +		ns = advance_expire_delta * 1000000ULL;
>  		do_div(ns, vcpu->arch.virtual_tsc_khz);
>  		timer_advance_ns += min((u32)ns,
>  			timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
>  	}
>  
> -	if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
> +	if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
>  		apic->lapic_timer.timer_advance_adjust_done = true;
>  	if (unlikely(timer_advance_ns > 5000)) {
>  		timer_advance_ns = 0;
> @@ -1545,13 +1545,13 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
>  	tsc_deadline = apic->lapic_timer.expired_tscdeadline;
>  	apic->lapic_timer.expired_tscdeadline = 0;
>  	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> -	trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
> +	apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
>  
> -	if (guest_tsc < tsc_deadline)
> +	if (apic->lapic_timer.advance_expire_delta < 0)

I'd prefer to keep "guest_tsc < tsc_deadline" here, just so that it's
obvious that the call to __wait_lapic_expire() is safe.  My eyes did a
few double takes reading this code :-)

>  		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
>  
>  	if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
> -		adaptive_tune_timer_advancement(vcpu, guest_tsc, tsc_deadline);
> +		adaptive_tune_timer_advancement(vcpu, apic->lapic_timer.advance_expire_delta);
>  }
>  
>  static void start_sw_tscdeadline(struct kvm_lapic *apic)
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index d6d049b..3e72a25 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -32,6 +32,7 @@ struct kvm_timer {
>  	u64 tscdeadline;
>  	u64 expired_tscdeadline;
>  	u32 timer_advance_ns;
> +	s64 advance_expire_delta;
>  	atomic_t pending;			/* accumulated triggered timers */
>  	bool hv_timer_in_use;
>  	bool timer_advance_adjust_done;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f2e3847..4a7b00c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7961,6 +7961,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	++vcpu->stat.exits;
>  
>  	guest_exit_irqoff();
> +	trace_kvm_wait_lapic_expire(vcpu->vcpu_id,
> +		vcpu->arch.apic->lapic_timer.advance_expire_delta);

This needs to be guarded with lapic_in_kernel(vcpu).  But, since this is
all in the same flow, a better approach would be to return the delta from
wait_lapic_expire().  That saves 8 bytes in struct kvm_timer and avoids
additional checks for tracing the delta.

E.g.:

	s64 lapic_expire_delta;

	...

        if (lapic_in_kernel(vcpu) &&
            vcpu->arch.apic->lapic_timer.timer_advance_ns)
                lapic_expire_delta = wait_lapic_expire(vcpu);
	else
		lapic_expire_delta = 0;

	...
	
	trace_kvm_wait_lapic_expire(vcpu->vcpu_id, lapic_expire_delta);
>  
>  	local_irq_enable();
>  	preempt_enable();
> -- 
> 2.7.4
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 5/5] KVM: LAPIC: Optimize timer latency further
  2019-05-16  3:06 ` [PATCH v3 5/5] KVM: LAPIC: Optimize timer latency further Wanpeng Li
@ 2019-05-17 19:50   ` Sean Christopherson
  2019-05-20  8:19     ` Wanpeng Li
  0 siblings, 1 reply; 15+ messages in thread
From: Sean Christopherson @ 2019-05-17 19:50 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: linux-kernel, kvm, Paolo Bonzini, Radim Krčmář,
	Liran Alon

On Thu, May 16, 2019 at 11:06:20AM +0800, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> Advance lapic timer tries to hidden the hypervisor overhead between the 
> host emulated timer fires and the guest awares the timer is fired. However, 
> it just hidden the time between apic_timer_fn/handle_preemption_timer -> 
> wait_lapic_expire, instead of the real position of vmentry which is 
> mentioned in the orignial commit d0659d946be0 ("KVM: x86: add option to 
> advance tscdeadline hrtimer expiration"). There is 700+ cpu cycles between 
> the end of wait_lapic_expire and before world switch on my haswell desktop.
> 
> This patch tries to narrow the last gap(wait_lapic_expire -> world switch), 
> it takes the real overhead time between apic_timer_fn/handle_preemption_timer
> and before world switch into consideration when adaptively tuning timer 
> advancement. The patch can reduce 40% latency (~1600+ cycles to ~1000+ cycles 
> on a haswell desktop) for kvm-unit-tests/tscdeadline_latency when testing 
> busy waits.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Liran Alon <liran.alon@oracle.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  arch/x86/kvm/lapic.c   | 3 ++-
>  arch/x86/kvm/lapic.h   | 2 +-
>  arch/x86/kvm/svm.c     | 4 ++++
>  arch/x86/kvm/vmx/vmx.c | 4 ++++
>  arch/x86/kvm/x86.c     | 3 ---
>  5 files changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index af38ece..63513de 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1531,7 +1531,7 @@ static inline void adaptive_tune_timer_advancement(struct kvm_vcpu *vcpu,
>  	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
>  }
>  
> -void wait_lapic_expire(struct kvm_vcpu *vcpu)
> +void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  	u64 guest_tsc, tsc_deadline;
> @@ -1553,6 +1553,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
>  	if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
>  		adaptive_tune_timer_advancement(vcpu, apic->lapic_timer.advance_expire_delta);
>  }
> +EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
>  
>  static void start_sw_tscdeadline(struct kvm_lapic *apic)
>  {
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 3e72a25..f974a3d 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -220,7 +220,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
>  
>  bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
>  
> -void wait_lapic_expire(struct kvm_vcpu *vcpu);
> +void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
>  
>  bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
>  			struct kvm_vcpu **dest_vcpu);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 6b92eaf..955cfcb 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -5638,6 +5638,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
>  	clgi();
>  	kvm_load_guest_xcr0(vcpu);
>  
> +	if (lapic_in_kernel(vcpu) &&
> +		vcpu->arch.apic->lapic_timer.timer_advance_ns)

Nit: align the two lines of the if statement, doing so makes it easier to
     differentiate between the condition and execution, e.g.:

        if (lapic_in_kernel(vcpu) &&
            vcpu->arch.apic->lapic_timer.timer_advance_ns)
                kvm_wait_lapic_expire(vcpu);

> +		kvm_wait_lapic_expire(vcpu);
> +
>  	/*
>  	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
>  	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index e1fa935..771d3bf 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6423,6 +6423,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  
>  	vmx_update_hv_timer(vcpu);
>  
> +	if (lapic_in_kernel(vcpu) &&
> +		vcpu->arch.apic->lapic_timer.timer_advance_ns)
> +		kvm_wait_lapic_expire(vcpu);

Same comment as above.  With those fixed:

Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>

> +
>  	/*
>  	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
>  	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4a7b00c..e154f52 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7903,9 +7903,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	}
>  
>  	trace_kvm_entry(vcpu->vcpu_id);
> -	if (lapic_in_kernel(vcpu) &&
> -	    vcpu->arch.apic->lapic_timer.timer_advance_ns)
> -		wait_lapic_expire(vcpu);
>  	guest_enter_irqoff();
>  
>  	fpregs_assert_state_consistent();
> -- 
> 2.7.4
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 3/5] KVM: LAPIC: Expose per-vCPU timer_advance_ns to userspace
  2019-05-16  3:06 ` [PATCH v3 3/5] KVM: LAPIC: Expose per-vCPU timer_advance_ns to userspace Wanpeng Li
@ 2019-05-17 20:05   ` Sean Christopherson
  2019-05-20  8:43     ` Wanpeng Li
  0 siblings, 1 reply; 15+ messages in thread
From: Sean Christopherson @ 2019-05-17 20:05 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: linux-kernel, kvm, Paolo Bonzini, Radim Krčmář,
	Liran Alon

On Thu, May 16, 2019 at 11:06:18AM +0800, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> Expose per-vCPU timer_advance_ns to userspace, so it is able to 
> query the auto-adjusted value.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Liran Alon <liran.alon@oracle.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  arch/x86/kvm/debugfs.c | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
> index c19c7ed..a6f1f93 100644
> --- a/arch/x86/kvm/debugfs.c
> +++ b/arch/x86/kvm/debugfs.c
> @@ -9,12 +9,22 @@
>   */
>  #include <linux/kvm_host.h>
>  #include <linux/debugfs.h>
> +#include "lapic.h"
>  
>  bool kvm_arch_has_vcpu_debugfs(void)
>  {
>  	return true;
>  }
>  
> +static int vcpu_get_timer_advance_ns(void *data, u64 *val)
> +{
> +	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
> +	*val = vcpu->arch.apic->lapic_timer.timer_advance_ns;

This needs to ensure to check lapic_in_kernel() to ensure apic isn't NULL.
Actually, I think we can skip creation of the parameter entirely if
lapic_in_kernel() is false.  VMX and SVM both instantiate the lapic
during kvm_arch_vcpu_create(), which is (obviously) called before
kvm_arch_create_vcpu_debugfs().

> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(vcpu_timer_advance_ns_fops, vcpu_get_timer_advance_ns, NULL, "%llu\n");
> +
>  static int vcpu_get_tsc_offset(void *data, u64 *val)
>  {
>  	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
> @@ -51,6 +61,12 @@ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
>  	if (!ret)
>  		return -ENOMEM;
>  
> +	ret = debugfs_create_file("lapic_timer_advance_ns", 0444,
> +							vcpu->debugfs_dentry,
> +							vcpu, &vcpu_timer_advance_ns_fops);
> +	if (!ret)
> +		return -ENOMEM;
> +
>  	if (kvm_has_tsc_control) {
>  		ret = debugfs_create_file("tsc-scaling-ratio", 0444,
>  							vcpu->debugfs_dentry,
> -- 
> 2.7.4
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 4/5] KVM: LAPIC: Delay trace advance expire delta
  2019-05-17 19:44   ` Sean Christopherson
@ 2019-05-20  6:38     ` Wanpeng Li
  2019-05-20 14:56       ` Sean Christopherson
  0 siblings, 1 reply; 15+ messages in thread
From: Wanpeng Li @ 2019-05-20  6:38 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: LKML, kvm, Paolo Bonzini, Radim Krčmář, Liran Alon

On Sat, 18 May 2019 at 03:44, Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Thu, May 16, 2019 at 11:06:19AM +0800, Wanpeng Li wrote:
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > wait_lapic_expire() call was moved above guest_enter_irqoff() because of
> > its tracepoint, which violated the RCU extended quiescent state invoked
> > by guest_enter_irqoff()[1][2]. This patch simply moves the tracepoint
> > below guest_exit_irqoff() in vcpu_enter_guest(). Snapshot the delta before
> > VM-Enter, but trace it after VM-Exit. This can help us to move
> > wait_lapic_expire() just before vmentry in the later patch.
> >
> > [1] Commit 8b89fe1f6c43 ("kvm: x86: move tracepoints outside extended quiescent state")
> > [2] https://patchwork.kernel.org/patch/7821111/
> >
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Radim Krčmář <rkrcmar@redhat.com>
> > Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> > Cc: Liran Alon <liran.alon@oracle.com>
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > ---
> >  arch/x86/kvm/lapic.c | 16 ++++++++--------
> >  arch/x86/kvm/lapic.h |  1 +
> >  arch/x86/kvm/x86.c   |  2 ++
> >  3 files changed, 11 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 2f364fe..af38ece 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -1502,27 +1502,27 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
> >  }
> >
> >  static inline void adaptive_tune_timer_advancement(struct kvm_vcpu *vcpu,
> > -                             u64 guest_tsc, u64 tsc_deadline)
> > +                             s64 advance_expire_delta)
> >  {
> >       struct kvm_lapic *apic = vcpu->arch.apic;
> >       u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
> >       u64 ns;
> >
> >       /* too early */
> > -     if (guest_tsc < tsc_deadline) {
> > -             ns = (tsc_deadline - guest_tsc) * 1000000ULL;
> > +     if (advance_expire_delta < 0) {
> > +             ns = -advance_expire_delta * 1000000ULL;
> >               do_div(ns, vcpu->arch.virtual_tsc_khz);
> >               timer_advance_ns -= min((u32)ns,
> >                       timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
> >       } else {
> >       /* too late */
> > -             ns = (guest_tsc - tsc_deadline) * 1000000ULL;
> > +             ns = advance_expire_delta * 1000000ULL;
> >               do_div(ns, vcpu->arch.virtual_tsc_khz);
> >               timer_advance_ns += min((u32)ns,
> >                       timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
> >       }
> >
> > -     if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
> > +     if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
> >               apic->lapic_timer.timer_advance_adjust_done = true;
> >       if (unlikely(timer_advance_ns > 5000)) {
> >               timer_advance_ns = 0;
> > @@ -1545,13 +1545,13 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
> >       tsc_deadline = apic->lapic_timer.expired_tscdeadline;
> >       apic->lapic_timer.expired_tscdeadline = 0;
> >       guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> > -     trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
> > +     apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
> >
> > -     if (guest_tsc < tsc_deadline)
> > +     if (apic->lapic_timer.advance_expire_delta < 0)
>
> I'd prefer to keep "guest_tsc < tsc_deadline" here, just so that it's
> obvious that the call to __wait_lapic_expire() is safe.  My eyes did a
> few double takes reading this code :-)

Ok.

>
> >               __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
> >
> >       if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
> > -             adaptive_tune_timer_advancement(vcpu, guest_tsc, tsc_deadline);
> > +             adaptive_tune_timer_advancement(vcpu, apic->lapic_timer.advance_expire_delta);
> >  }
> >
> >  static void start_sw_tscdeadline(struct kvm_lapic *apic)
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index d6d049b..3e72a25 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -32,6 +32,7 @@ struct kvm_timer {
> >       u64 tscdeadline;
> >       u64 expired_tscdeadline;
> >       u32 timer_advance_ns;
> > +     s64 advance_expire_delta;
> >       atomic_t pending;                       /* accumulated triggered timers */
> >       bool hv_timer_in_use;
> >       bool timer_advance_adjust_done;
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index f2e3847..4a7b00c 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -7961,6 +7961,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >       ++vcpu->stat.exits;
> >
> >       guest_exit_irqoff();
> > +     trace_kvm_wait_lapic_expire(vcpu->vcpu_id,
> > +             vcpu->arch.apic->lapic_timer.advance_expire_delta);
>
> This needs to be guarded with lapic_in_kernel(vcpu).  But, since this is
> all in the same flow, a better approach would be to return the delta from
> wait_lapic_expire().  That saves 8 bytes in struct kvm_timer and avoids
> additional checks for tracing the delta.

As you know, the function wait_lapic_expire() will be moved to vmx.c
and svm.c, so this is not suitable any more.

Regards,
Wanpeng Li

>
> E.g.:
>
>         s64 lapic_expire_delta;
>
>         ...
>
>         if (lapic_in_kernel(vcpu) &&
>             vcpu->arch.apic->lapic_timer.timer_advance_ns)
>                 lapic_expire_delta = wait_lapic_expire(vcpu);
>         else
>                 lapic_expire_delta = 0;
>
>         ...
>
>         trace_kvm_wait_lapic_expire(vcpu->vcpu_id, lapic_expire_delta);
> >
> >       local_irq_enable();
> >       preempt_enable();
> > --
> > 2.7.4
> >

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 5/5] KVM: LAPIC: Optimize timer latency further
  2019-05-17 19:50   ` Sean Christopherson
@ 2019-05-20  8:19     ` Wanpeng Li
  2019-05-20 15:08       ` Sean Christopherson
  0 siblings, 1 reply; 15+ messages in thread
From: Wanpeng Li @ 2019-05-20  8:19 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: LKML, kvm, Paolo Bonzini, Radim Krčmář, Liran Alon

On Sat, 18 May 2019 at 03:50, Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Thu, May 16, 2019 at 11:06:20AM +0800, Wanpeng Li wrote:
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > Advance lapic timer tries to hidden the hypervisor overhead between the
> > host emulated timer fires and the guest awares the timer is fired. However,
> > it just hidden the time between apic_timer_fn/handle_preemption_timer ->
> > wait_lapic_expire, instead of the real position of vmentry which is
> > mentioned in the orignial commit d0659d946be0 ("KVM: x86: add option to
> > advance tscdeadline hrtimer expiration"). There is 700+ cpu cycles between
> > the end of wait_lapic_expire and before world switch on my haswell desktop.
> >
> > This patch tries to narrow the last gap(wait_lapic_expire -> world switch),
> > it takes the real overhead time between apic_timer_fn/handle_preemption_timer
> > and before world switch into consideration when adaptively tuning timer
> > advancement. The patch can reduce 40% latency (~1600+ cycles to ~1000+ cycles
> > on a haswell desktop) for kvm-unit-tests/tscdeadline_latency when testing
> > busy waits.
> >
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Radim Krčmář <rkrcmar@redhat.com>
> > Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> > Cc: Liran Alon <liran.alon@oracle.com>
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > ---
> >  arch/x86/kvm/lapic.c   | 3 ++-
> >  arch/x86/kvm/lapic.h   | 2 +-
> >  arch/x86/kvm/svm.c     | 4 ++++
> >  arch/x86/kvm/vmx/vmx.c | 4 ++++
> >  arch/x86/kvm/x86.c     | 3 ---
> >  5 files changed, 11 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index af38ece..63513de 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -1531,7 +1531,7 @@ static inline void adaptive_tune_timer_advancement(struct kvm_vcpu *vcpu,
> >       apic->lapic_timer.timer_advance_ns = timer_advance_ns;
> >  }
> >
> > -void wait_lapic_expire(struct kvm_vcpu *vcpu)
> > +void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
> >  {
> >       struct kvm_lapic *apic = vcpu->arch.apic;
> >       u64 guest_tsc, tsc_deadline;
> > @@ -1553,6 +1553,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
> >       if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
> >               adaptive_tune_timer_advancement(vcpu, apic->lapic_timer.advance_expire_delta);
> >  }
> > +EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
> >
> >  static void start_sw_tscdeadline(struct kvm_lapic *apic)
> >  {
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index 3e72a25..f974a3d 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -220,7 +220,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
> >
> >  bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
> >
> > -void wait_lapic_expire(struct kvm_vcpu *vcpu);
> > +void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
> >
> >  bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
> >                       struct kvm_vcpu **dest_vcpu);
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 6b92eaf..955cfcb 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -5638,6 +5638,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
> >       clgi();
> >       kvm_load_guest_xcr0(vcpu);
> >
> > +     if (lapic_in_kernel(vcpu) &&
> > +             vcpu->arch.apic->lapic_timer.timer_advance_ns)
>
> Nit: align the two lines of the if statement, doing so makes it easier to
>      differentiate between the condition and execution, e.g.:
>
>         if (lapic_in_kernel(vcpu) &&
>             vcpu->arch.apic->lapic_timer.timer_advance_ns)
>                 kvm_wait_lapic_expire(vcpu);

This can result in checkpatch.pl complain:

WARNING: suspect code indent for conditional statements (8, 24)
#94: FILE: arch/x86/kvm/vmx/vmx.c:6436:
+    if (lapic_in_kernel(vcpu) &&
[...]
+            kvm_wait_lapic_expire(vcpu);

Regards,
Wanpeng Li

>
> > +             kvm_wait_lapic_expire(vcpu);
> > +
> >       /*
> >        * If this vCPU has touched SPEC_CTRL, restore the guest's value if
> >        * it's non-zero. Since vmentry is serialising on affected CPUs, there
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index e1fa935..771d3bf 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -6423,6 +6423,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
> >
> >       vmx_update_hv_timer(vcpu);
> >
> > +     if (lapic_in_kernel(vcpu) &&
> > +             vcpu->arch.apic->lapic_timer.timer_advance_ns)
> > +             kvm_wait_lapic_expire(vcpu);
>
> Same comment as above.  With those fixed:
>
> Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
>
> > +
> >       /*
> >        * If this vCPU has touched SPEC_CTRL, restore the guest's value if
> >        * it's non-zero. Since vmentry is serialising on affected CPUs, there
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 4a7b00c..e154f52 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -7903,9 +7903,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >       }
> >
> >       trace_kvm_entry(vcpu->vcpu_id);
> > -     if (lapic_in_kernel(vcpu) &&
> > -         vcpu->arch.apic->lapic_timer.timer_advance_ns)
> > -             wait_lapic_expire(vcpu);
> >       guest_enter_irqoff();
> >
> >       fpregs_assert_state_consistent();
> > --
> > 2.7.4
> >

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 3/5] KVM: LAPIC: Expose per-vCPU timer_advance_ns to userspace
  2019-05-17 20:05   ` Sean Christopherson
@ 2019-05-20  8:43     ` Wanpeng Li
  0 siblings, 0 replies; 15+ messages in thread
From: Wanpeng Li @ 2019-05-20  8:43 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: LKML, kvm, Paolo Bonzini, Radim Krčmář, Liran Alon

On Sat, 18 May 2019 at 04:05, Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Thu, May 16, 2019 at 11:06:18AM +0800, Wanpeng Li wrote:
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > Expose per-vCPU timer_advance_ns to userspace, so it is able to
> > query the auto-adjusted value.
> >
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Radim Krčmář <rkrcmar@redhat.com>
> > Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> > Cc: Liran Alon <liran.alon@oracle.com>
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > ---
> >  arch/x86/kvm/debugfs.c | 16 ++++++++++++++++
> >  1 file changed, 16 insertions(+)
> >
> > diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
> > index c19c7ed..a6f1f93 100644
> > --- a/arch/x86/kvm/debugfs.c
> > +++ b/arch/x86/kvm/debugfs.c
> > @@ -9,12 +9,22 @@
> >   */
> >  #include <linux/kvm_host.h>
> >  #include <linux/debugfs.h>
> > +#include "lapic.h"
> >
> >  bool kvm_arch_has_vcpu_debugfs(void)
> >  {
> >       return true;
> >  }
> >
> > +static int vcpu_get_timer_advance_ns(void *data, u64 *val)
> > +{
> > +     struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
> > +     *val = vcpu->arch.apic->lapic_timer.timer_advance_ns;
>
> This needs to ensure to check lapic_in_kernel() to ensure apic isn't NULL.
> Actually, I think we can skip creation of the parameter entirely if
> lapic_in_kernel() is false.  VMX and SVM both instantiate the lapic
> during kvm_arch_vcpu_create(), which is (obviously) called before
> kvm_arch_create_vcpu_debugfs().

Handle this in v4.

>
> > +     return 0;
> > +}
> > +
> > +DEFINE_SIMPLE_ATTRIBUTE(vcpu_timer_advance_ns_fops, vcpu_get_timer_advance_ns, NULL, "%llu\n");
> > +
> >  static int vcpu_get_tsc_offset(void *data, u64 *val)
> >  {
> >       struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
> > @@ -51,6 +61,12 @@ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
> >       if (!ret)
> >               return -ENOMEM;
> >
> > +     ret = debugfs_create_file("lapic_timer_advance_ns", 0444,
> > +                                                     vcpu->debugfs_dentry,
> > +                                                     vcpu, &vcpu_timer_advance_ns_fops);
> > +     if (!ret)
> > +             return -ENOMEM;
> > +
> >       if (kvm_has_tsc_control) {
> >               ret = debugfs_create_file("tsc-scaling-ratio", 0444,
> >                                                       vcpu->debugfs_dentry,
> > --
> > 2.7.4
> >

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 4/5] KVM: LAPIC: Delay trace advance expire delta
  2019-05-20  6:38     ` Wanpeng Li
@ 2019-05-20 14:56       ` Sean Christopherson
  0 siblings, 0 replies; 15+ messages in thread
From: Sean Christopherson @ 2019-05-20 14:56 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: LKML, kvm, Paolo Bonzini, Radim Krčmář, Liran Alon

On Mon, May 20, 2019 at 02:38:44PM +0800, Wanpeng Li wrote:
> On Sat, 18 May 2019 at 03:44, Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
> > This needs to be guarded with lapic_in_kernel(vcpu).  But, since this is
> > all in the same flow, a better approach would be to return the delta from
> > wait_lapic_expire().  That saves 8 bytes in struct kvm_timer and avoids
> > additional checks for tracing the delta.
> 
> As you know, the function wait_lapic_expire() will be moved to vmx.c
> and svm.c, so this is not suitable any more.

Doh, I was too excited about my cleverness and completely forgot why you
were moving the tracepoint in the first place.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 5/5] KVM: LAPIC: Optimize timer latency further
  2019-05-20  8:19     ` Wanpeng Li
@ 2019-05-20 15:08       ` Sean Christopherson
  0 siblings, 0 replies; 15+ messages in thread
From: Sean Christopherson @ 2019-05-20 15:08 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: LKML, kvm, Paolo Bonzini, Radim Krčmář, Liran Alon

On Mon, May 20, 2019 at 04:19:47PM +0800, Wanpeng Li wrote:
> On Sat, 18 May 2019 at 03:50, Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
> >
> > On Thu, May 16, 2019 at 11:06:20AM +0800, Wanpeng Li wrote:
> > > From: Wanpeng Li <wanpengli@tencent.com>
> > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > > index 6b92eaf..955cfcb 100644
> > > --- a/arch/x86/kvm/svm.c
> > > +++ b/arch/x86/kvm/svm.c
> > > @@ -5638,6 +5638,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
> > >       clgi();
> > >       kvm_load_guest_xcr0(vcpu);
> > >
> > > +     if (lapic_in_kernel(vcpu) &&
> > > +             vcpu->arch.apic->lapic_timer.timer_advance_ns)
> >
> > Nit: align the two lines of the if statement, doing so makes it easier to
> >      differentiate between the condition and execution, e.g.:
> >
> >         if (lapic_in_kernel(vcpu) &&
> >             vcpu->arch.apic->lapic_timer.timer_advance_ns)
> >                 kvm_wait_lapic_expire(vcpu);
> 
> This can result in checkpatch.pl complain:
> 
> WARNING: suspect code indent for conditional statements (8, 24)
> #94: FILE: arch/x86/kvm/vmx/vmx.c:6436:
> +    if (lapic_in_kernel(vcpu) &&
> [...]
> +            kvm_wait_lapic_expire(vcpu);

That warning fires when the last line of the check and the code block of
the if statement are aligned (and the indent isn't a full tab stop, which
is why your original code isn't flagged).  Examples with explicit leading
whitespace:

Good:

\tif (lapic_in_kernel(vcpu) &&
\t\s\s\s\svcpu->arch.apic->lapic_timer.timer_advance_ns)
\t\tkvm_wait_lapic_expire(vcpu);

Bad:

\tif (lapic_in_kernel(vcpu) &&
\t\s\s\s\svcpu->arch.apic->lapic_timer.timer_advance_ns)
\t\s\s\s\skvm_wait_lapic_expire(vcpu);

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3 5/5] KVM: LAPIC: Optimize timer latency further
  2019-06-12  9:35 [PATCH v3 0/5] " Wanpeng Li
@ 2019-06-12  9:36 ` Wanpeng Li
  0 siblings, 0 replies; 15+ messages in thread
From: Wanpeng Li @ 2019-06-12  9:36 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Liran Alon

From: Wanpeng Li <wanpengli@tencent.com>

Advance lapic timer tries to hidden the hypervisor overhead between the 
host emulated timer fires and the guest awares the timer is fired. However, 
it just hidden the time between apic_timer_fn/handle_preemption_timer -> 
wait_lapic_expire, instead of the real position of vmentry which is 
mentioned in the orignial commit d0659d946be0 ("KVM: x86: add option to 
advance tscdeadline hrtimer expiration"). There is 700+ cpu cycles between 
the end of wait_lapic_expire and before world switch on my haswell desktop.

This patch tries to narrow the last gap(wait_lapic_expire -> world switch), 
it takes the real overhead time between apic_timer_fn/handle_preemption_timer
and before world switch into consideration when adaptively tuning timer 
advancement. The patch can reduce 40% latency (~1600+ cycles to ~1000+ cycles 
on a haswell desktop) for kvm-unit-tests/tscdeadline_latency when testing 
busy waits.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kvm/lapic.c   | 3 ++-
 arch/x86/kvm/lapic.h   | 2 +-
 arch/x86/kvm/svm.c     | 4 ++++
 arch/x86/kvm/vmx/vmx.c | 4 ++++
 arch/x86/kvm/x86.c     | 3 ---
 5 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index af38ece..63513de 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1531,7 +1531,7 @@ static inline void adaptive_tune_timer_advancement(struct kvm_vcpu *vcpu,
 	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 }
 
-void wait_lapic_expire(struct kvm_vcpu *vcpu)
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u64 guest_tsc, tsc_deadline;
@@ -1553,6 +1553,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 	if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
 		adaptive_tune_timer_advancement(vcpu, apic->lapic_timer.advance_expire_delta);
 }
+EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
 
 static void start_sw_tscdeadline(struct kvm_lapic *apic)
 {
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 3e72a25..f974a3d 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -220,7 +220,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
 
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
-void wait_lapic_expire(struct kvm_vcpu *vcpu);
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
 
 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6b92eaf..955cfcb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5638,6 +5638,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	clgi();
 	kvm_load_guest_xcr0(vcpu);
 
+	if (lapic_in_kernel(vcpu) &&
+		vcpu->arch.apic->lapic_timer.timer_advance_ns)
+		kvm_wait_lapic_expire(vcpu);
+
 	/*
 	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
 	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e1fa935..771d3bf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6423,6 +6423,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	vmx_update_hv_timer(vcpu);
 
+	if (lapic_in_kernel(vcpu) &&
+		vcpu->arch.apic->lapic_timer.timer_advance_ns)
+		kvm_wait_lapic_expire(vcpu);
+
 	/*
 	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
 	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4a7b00c..e154f52 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7903,9 +7903,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	trace_kvm_entry(vcpu->vcpu_id);
-	if (lapic_in_kernel(vcpu) &&
-	    vcpu->arch.apic->lapic_timer.timer_advance_ns)
-		wait_lapic_expire(vcpu);
 	guest_enter_irqoff();
 
 	fpregs_assert_state_consistent();
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2019-06-12  9:36 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-16  3:06 [PATCH v3 0/5] KVM: LAPIC: Optimize timer latency further Wanpeng Li
2019-05-16  3:06 ` [PATCH v3 1/5] KVM: LAPIC: Extract adaptive tune timer advancement logic Wanpeng Li
2019-05-16  3:06 ` [PATCH v3 2/5] KVM: LAPIC: Fix lapic_timer_advance_ns parameter overflow Wanpeng Li
2019-05-16  3:06 ` [PATCH v3 3/5] KVM: LAPIC: Expose per-vCPU timer_advance_ns to userspace Wanpeng Li
2019-05-17 20:05   ` Sean Christopherson
2019-05-20  8:43     ` Wanpeng Li
2019-05-16  3:06 ` [PATCH v3 4/5] KVM: LAPIC: Delay trace advance expire delta Wanpeng Li
2019-05-17 19:44   ` Sean Christopherson
2019-05-20  6:38     ` Wanpeng Li
2019-05-20 14:56       ` Sean Christopherson
2019-05-16  3:06 ` [PATCH v3 5/5] KVM: LAPIC: Optimize timer latency further Wanpeng Li
2019-05-17 19:50   ` Sean Christopherson
2019-05-20  8:19     ` Wanpeng Li
2019-05-20 15:08       ` Sean Christopherson
2019-06-12  9:35 [PATCH v3 0/5] " Wanpeng Li
2019-06-12  9:36 ` [PATCH v3 5/5] " Wanpeng Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.