Re: [PATCH v4 1/2] KVM: LAPIC: Optimize timer latency consider world switch time

From: Wanpeng Li <kernellwp@gmail.com>
To: LKML <linux-kernel@vger.kernel.org>, kvm <kvm@vger.kernel.org>
Cc: "Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"Sean Christopherson" <sean.j.christopherson@intel.com>
Subject: Re: [PATCH v4 1/2] KVM: LAPIC: Optimize timer latency consider world switch time
Date: Fri, 21 Jun 2019 17:44:00 +0800	[thread overview]
Message-ID: <CANRm+CzUvTTOuYhsGErSDxdNSmxVr7o8d66DF0KOk4v3Meajmg@mail.gmail.com> (raw)
In-Reply-To: <1560474949-20497-2-git-send-email-wanpengli@tencent.com>

ping,
On Fri, 14 Jun 2019 at 09:15, Wanpeng Li <kernellwp@gmail.com> wrote:
>
> From: Wanpeng Li <wanpengli@tencent.com>
>
> Advance lapic timer tries to hidden the hypervisor overhead between the
> host emulated timer fires and the guest awares the timer is fired. However,
> even though after more sustaining optimizations, kvm-unit-tests/tscdeadline_latency
> still awares ~1000 cycles latency since we lost the time between the end of
> wait_lapic_expire and the guest awares the timer is fired. There are
> codes between the end of wait_lapic_expire and the world switch, furthermore,
> the world switch itself also has overhead. Actually the guest_tsc is equal
> to the target deadline time in wait_lapic_expire is too late, guest will
> aware the latency between the end of wait_lapic_expire() and after vmentry
> to the guest. This patch takes this time into consideration.
>
> The vmentry_advance_ns module parameter is conservative 25ns by default(thanks
> to Radim's kvm-unit-tests/vmentry_latency.flat), it can be tuned/reworked in
> the future.
>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
> v3 -> v4:
>  * default value is 25ns
>  * compute vmentry_advance_cycles in kvm_set_tsc_khz() path
> v2 -> v3:
>  * read-only module parameter
>  * get_vmentry_advance_cycles() not inline
> v1 -> v2:
>  * rename get_vmentry_advance_delta to get_vmentry_advance_cycles
>  * cache vmentry_advance_cycles by setting param bit 0
>  * add param max limit
>
>  arch/x86/kvm/lapic.c   | 21 ++++++++++++++++++---
>  arch/x86/kvm/lapic.h   |  2 ++
>  arch/x86/kvm/vmx/vmx.c |  3 ++-
>  arch/x86/kvm/x86.c     | 12 ++++++++++--
>  arch/x86/kvm/x86.h     |  2 ++
>  5 files changed, 34 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index e82a18c..e92e4e5 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1528,6 +1528,19 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
>         apic->lapic_timer.timer_advance_ns = timer_advance_ns;
>  }
>
> +u64 compute_vmentry_advance_cycles(struct kvm_vcpu *vcpu)
> +{
> +       u64 cycles;
> +       struct kvm_lapic *apic = vcpu->arch.apic;
> +
> +       cycles = vmentry_advance_ns * vcpu->arch.virtual_tsc_khz;
> +       do_div(cycles, 1000000);
> +
> +       apic->lapic_timer.vmentry_advance_cycles = cycles;
> +
> +       return cycles;
> +}
> +
>  void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
>  {
>         struct kvm_lapic *apic = vcpu->arch.apic;
> @@ -1541,7 +1554,8 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
>
>         tsc_deadline = apic->lapic_timer.expired_tscdeadline;
>         apic->lapic_timer.expired_tscdeadline = 0;
> -       guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +       guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) +
> +               apic->lapic_timer.vmentry_advance_cycles;
>         apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
>
>         if (guest_tsc < tsc_deadline)
> @@ -1569,7 +1583,8 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
>         local_irq_save(flags);
>
>         now = ktime_get();
> -       guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +       guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) +
> +               apic->lapic_timer.vmentry_advance_cycles;
>
>         ns = (tscdeadline - guest_tsc) * 1000000ULL;
>         do_div(ns, this_tsc_khz);
> @@ -2326,7 +2341,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
>                 apic->lapic_timer.timer_advance_ns = timer_advance_ns;
>                 apic->lapic_timer.timer_advance_adjust_done = true;
>         }
> -
> +       apic->lapic_timer.vmentry_advance_cycles = 0;
>
>         /*
>          * APIC is created enabled. This will prevent kvm_lapic_set_base from
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 3674717..7c38950 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -33,6 +33,7 @@ struct kvm_timer {
>         u64 expired_tscdeadline;
>         u32 timer_advance_ns;
>         s64 advance_expire_delta;
> +       u64 vmentry_advance_cycles;
>         atomic_t pending;                       /* accumulated triggered timers */
>         bool hv_timer_in_use;
>         bool timer_advance_adjust_done;
> @@ -226,6 +227,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
>  bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
>
>  void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
> +u64 compute_vmentry_advance_cycles(struct kvm_vcpu *vcpu);
>
>  bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
>                         struct kvm_vcpu **dest_vcpu);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 8fbea03..dc81c78 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7064,7 +7064,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
>
>         vmx = to_vmx(vcpu);
>         tscl = rdtsc();
> -       guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
> +       guest_tscl = kvm_read_l1_tsc(vcpu, tscl) +
> +               vcpu->arch.apic->lapic_timer.vmentry_advance_cycles;
>         delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
>         lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
>                                                     ktimer->timer_advance_ns);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0a05a4e..5e79b6c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -145,6 +145,12 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
>  static int __read_mostly lapic_timer_advance_ns = -1;
>  module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
>
> +/*
> + * lapic timer vmentry advance (tscdeadline mode only) in nanoseconds.
> + */
> +u32 __read_mostly vmentry_advance_ns = 25;
> +module_param(vmentry_advance_ns, uint, S_IRUGO);
> +
>  static bool __read_mostly vector_hashing = true;
>  module_param(vector_hashing, bool, S_IRUGO);
>
> @@ -1592,6 +1598,8 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
>         kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
>                            &vcpu->arch.virtual_tsc_shift,
>                            &vcpu->arch.virtual_tsc_mult);
> +       if (user_tsc_khz != vcpu->arch.virtual_tsc_khz)
> +               compute_vmentry_advance_cycles(vcpu);
>         vcpu->arch.virtual_tsc_khz = user_tsc_khz;
>
>         /*
> @@ -9134,8 +9142,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>         }
>         vcpu->arch.pio_data = page_address(page);
>
> -       kvm_set_tsc_khz(vcpu, max_tsc_khz);
> -
>         r = kvm_mmu_create(vcpu);
>         if (r < 0)
>                 goto fail_free_pio_data;
> @@ -9148,6 +9154,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>         } else
>                 static_key_slow_inc(&kvm_no_apic_vcpu);
>
> +       kvm_set_tsc_khz(vcpu, max_tsc_khz);
> +
>         vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
>                                        GFP_KERNEL_ACCOUNT);
>         if (!vcpu->arch.mce_banks) {
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index e08a128..9998989 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -299,6 +299,8 @@ extern u64 kvm_supported_xcr0(void);
>
>  extern unsigned int min_timer_period_us;
>
> +extern unsigned int vmentry_advance_ns;
> +
>  extern bool enable_vmware_backdoor;
>
>  extern struct static_key kvm_no_apic_vcpu;
> --
> 2.7.4
>