kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Wanpeng Li <kernellwp@gmail.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: "Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"Sean Christopherson" <sean.j.christopherson@intel.com>
Subject: [PATCH v4 1/2] KVM: LAPIC: Optimize timer latency consider world switch time
Date: Fri, 14 Jun 2019 09:15:49 +0800	[thread overview]
Message-ID: <1560474949-20497-2-git-send-email-wanpengli@tencent.com> (raw)
In-Reply-To: <1560474949-20497-1-git-send-email-wanpengli@tencent.com>

From: Wanpeng Li <wanpengli@tencent.com>

Advance lapic timer tries to hidden the hypervisor overhead between the
host emulated timer fires and the guest awares the timer is fired. However,
even though after more sustaining optimizations, kvm-unit-tests/tscdeadline_latency 
still awares ~1000 cycles latency since we lost the time between the end of 
wait_lapic_expire and the guest awares the timer is fired. There are 
codes between the end of wait_lapic_expire and the world switch, furthermore, 
the world switch itself also has overhead. Actually the guest_tsc is equal 
to the target deadline time in wait_lapic_expire is too late, guest will
aware the latency between the end of wait_lapic_expire() and after vmentry 
to the guest. This patch takes this time into consideration. 

The vmentry_advance_ns module parameter is conservative 25ns by default(thanks 
to Radim's kvm-unit-tests/vmentry_latency.flat), it can be tuned/reworked in 
the future.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
v3 -> v4:
 * default value is 25ns
 * compute vmentry_advance_cycles in kvm_set_tsc_khz() path
v2 -> v3:
 * read-only module parameter
 * get_vmentry_advance_cycles() not inline
v1 -> v2:
 * rename get_vmentry_advance_delta to get_vmentry_advance_cycles
 * cache vmentry_advance_cycles by setting param bit 0 
 * add param max limit 

 arch/x86/kvm/lapic.c   | 21 ++++++++++++++++++---
 arch/x86/kvm/lapic.h   |  2 ++
 arch/x86/kvm/vmx/vmx.c |  3 ++-
 arch/x86/kvm/x86.c     | 12 ++++++++++--
 arch/x86/kvm/x86.h     |  2 ++
 5 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e82a18c..e92e4e5 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1528,6 +1528,19 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
 	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 }
 
+u64 compute_vmentry_advance_cycles(struct kvm_vcpu *vcpu)
+{
+	u64 cycles;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	cycles = vmentry_advance_ns * vcpu->arch.virtual_tsc_khz;
+	do_div(cycles, 1000000);
+
+	apic->lapic_timer.vmentry_advance_cycles = cycles;
+
+	return cycles;
+}
+
 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
@@ -1541,7 +1554,8 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 
 	tsc_deadline = apic->lapic_timer.expired_tscdeadline;
 	apic->lapic_timer.expired_tscdeadline = 0;
-	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) +
+		apic->lapic_timer.vmentry_advance_cycles;
 	apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
 
 	if (guest_tsc < tsc_deadline)
@@ -1569,7 +1583,8 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 	local_irq_save(flags);
 
 	now = ktime_get();
-	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) +
+		apic->lapic_timer.vmentry_advance_cycles;
 
 	ns = (tscdeadline - guest_tsc) * 1000000ULL;
 	do_div(ns, this_tsc_khz);
@@ -2326,7 +2341,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 		apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 		apic->lapic_timer.timer_advance_adjust_done = true;
 	}
-
+	apic->lapic_timer.vmentry_advance_cycles = 0;
 
 	/*
 	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 3674717..7c38950 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -33,6 +33,7 @@ struct kvm_timer {
 	u64 expired_tscdeadline;
 	u32 timer_advance_ns;
 	s64 advance_expire_delta;
+	u64 vmentry_advance_cycles;
 	atomic_t pending;			/* accumulated triggered timers */
 	bool hv_timer_in_use;
 	bool timer_advance_adjust_done;
@@ -226,6 +227,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
+u64 compute_vmentry_advance_cycles(struct kvm_vcpu *vcpu);
 
 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8fbea03..dc81c78 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7064,7 +7064,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
 
 	vmx = to_vmx(vcpu);
 	tscl = rdtsc();
-	guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
+	guest_tscl = kvm_read_l1_tsc(vcpu, tscl) +
+		vcpu->arch.apic->lapic_timer.vmentry_advance_cycles;
 	delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
 	lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
 						    ktimer->timer_advance_ns);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a05a4e..5e79b6c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -145,6 +145,12 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
 static int __read_mostly lapic_timer_advance_ns = -1;
 module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
 
+/*
+ * lapic timer vmentry advance (tscdeadline mode only) in nanoseconds.
+ */
+u32 __read_mostly vmentry_advance_ns = 25;
+module_param(vmentry_advance_ns, uint, S_IRUGO);
+
 static bool __read_mostly vector_hashing = true;
 module_param(vector_hashing, bool, S_IRUGO);
 
@@ -1592,6 +1598,8 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
 	kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
 			   &vcpu->arch.virtual_tsc_shift,
 			   &vcpu->arch.virtual_tsc_mult);
+	if (user_tsc_khz != vcpu->arch.virtual_tsc_khz)
+		compute_vmentry_advance_cycles(vcpu);
 	vcpu->arch.virtual_tsc_khz = user_tsc_khz;
 
 	/*
@@ -9134,8 +9142,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	}
 	vcpu->arch.pio_data = page_address(page);
 
-	kvm_set_tsc_khz(vcpu, max_tsc_khz);
-
 	r = kvm_mmu_create(vcpu);
 	if (r < 0)
 		goto fail_free_pio_data;
@@ -9148,6 +9154,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	} else
 		static_key_slow_inc(&kvm_no_apic_vcpu);
 
+	kvm_set_tsc_khz(vcpu, max_tsc_khz);
+
 	vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
 				       GFP_KERNEL_ACCOUNT);
 	if (!vcpu->arch.mce_banks) {
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e08a128..9998989 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -299,6 +299,8 @@ extern u64 kvm_supported_xcr0(void);
 
 extern unsigned int min_timer_period_us;
 
+extern unsigned int vmentry_advance_ns;
+
 extern bool enable_vmware_backdoor;
 
 extern struct static_key kvm_no_apic_vcpu;
-- 
2.7.4


  reply	other threads:[~2019-06-14  1:16 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-14  1:15 [PATCH v4 2/2] KVM: LAPIC: remove the trailing newline used in the fmt parameter of TP_printk Wanpeng Li
2019-06-14  1:15 ` Wanpeng Li [this message]
2019-06-21  9:44   ` [PATCH v4 1/2] KVM: LAPIC: Optimize timer latency consider world switch time Wanpeng Li
2019-06-28  8:29     ` Wanpeng Li
2019-07-03  0:48       ` Wanpeng Li
2019-07-03 14:13         ` Paolo Bonzini
2019-07-04  0:37           ` Wanpeng Li
2019-07-03 14:14 ` [PATCH v4 2/2] KVM: LAPIC: remove the trailing newline used in the fmt parameter of TP_printk Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1560474949-20497-2-git-send-email-wanpengli@tencent.com \
    --to=kernellwp@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    --cc=sean.j.christopherson@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).