All of lore.kernel.org
 help / color / mirror / Atom feed
From: Quan Xu <quan.xu0@gmail.com>
To: pbonzini@redhat.com, rkrcmar@redhat.com
Cc: yang.zhang.wz@gmail.com, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, Ben Luo <bn0418@gmail.com>,
	Quan Xu <quan.xu0@gmail.com>
Subject: [PATCH RFC 3/7] KVM: timer: synchronize tsc-deadline timestamp for guest
Date: Fri,  8 Dec 2017 16:39:46 +0800	[thread overview]
Message-ID: <1512722390-3654-4-git-send-email-quan.xu0@gmail.com> (raw)
In-Reply-To: <1512722390-3654-1-git-send-email-quan.xu0@gmail.com>

From: Ben Luo <bn0418@gmail.com>

In general, KVM guest programs tsc-deadline timestamp to
MSR_IA32_TSC_DEADLINE MSR. This will cause a VM-exit, and
then KVM handles this timer for guest.

The tsc-deadline timestamp is mostly recorded in share page
with less VM-exit. We Introduce a periodically working kthread
to scan share page and synchronize timer setting for guest
on a dedicated CPU.

Signed-off-by: Yang Zhang <yang.zhang.wz@gmail.com>
Signed-off-by: Quan Xu <quan.xu0@gmail.com>
Signed-off-by: Ben Luo <bn0418@gmail.com>
---
 arch/x86/kvm/lapic.c |  138 ++++++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/lapic.h |    5 ++
 2 files changed, 143 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 55c9ba3..20a23bb 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -36,6 +36,10 @@
 #include <asm/delay.h>
 #include <linux/atomic.h>
 #include <linux/jump_label.h>
+#include <linux/ktime.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/mmu_context.h>
 #include "kvm_cache_regs.h"
 #include "irq.h"
 #include "trace.h"
@@ -70,6 +74,12 @@
 #define APIC_BROADCAST			0xFF
 #define X2APIC_BROADCAST		0xFFFFFFFFul
 
+static struct hrtimer pv_sync_timer;
+static long pv_timer_period_ns = PVTIMER_PERIOD_NS;
+static struct task_struct *pv_timer_polling_worker;
+
+module_param(pv_timer_period_ns, long, 0644);
+
 static inline int apic_test_vector(int vec, void *bitmap)
 {
 	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -2542,8 +2552,130 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 	}
 }
 
+static enum hrtimer_restart pv_sync_timer_callback(struct hrtimer *timer)
+{
+	hrtimer_forward_now(timer, ns_to_ktime(pv_timer_period_ns));
+	wake_up_process(pv_timer_polling_worker);
+
+	return HRTIMER_RESTART;
+}
+
+void kvm_apic_sync_pv_timer(void *data)
+{
+	struct kvm_vcpu *vcpu = data;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	unsigned long flags, this_tsc_khz = vcpu->arch.virtual_tsc_khz;
+	u64 guest_tsc, expire_tsc;
+	long rem_tsc;
+
+	if (!lapic_in_kernel(vcpu) || !pv_timer_enabled(vcpu))
+		return;
+
+	local_irq_save(flags);
+	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+	rem_tsc = ktime_to_ns(hrtimer_get_remaining(&pv_sync_timer))
+			* this_tsc_khz;
+	if (rem_tsc <= 0)
+		rem_tsc += pv_timer_period_ns * this_tsc_khz;
+	do_div(rem_tsc, 1000000L);
+
+	/*
+	 * make sure guest_tsc and rem_tsc are assigned before to update
+	 * next_sync_tsc.
+	 */
+	smp_wmb();
+	kvm_xchg_guest_cached(vcpu->kvm, &vcpu->arch.pv_timer.data,
+		offsetof(struct pvtimer_vcpu_event_info, next_sync_tsc),
+		guest_tsc + rem_tsc, 8);
+
+	/* make sure next_sync_tsc is visible */
+	smp_wmb();
+
+	expire_tsc = kvm_xchg_guest_cached(vcpu->kvm, &vcpu->arch.pv_timer.data,
+			offsetof(struct pvtimer_vcpu_event_info, expire_tsc),
+			0UL, 8);
+
+	/* make sure expire_tsc is visible */
+	smp_wmb();
+
+	if (expire_tsc) {
+		if (expire_tsc > guest_tsc)
+			/*
+			 * As we bind this thread to a dedicated CPU through
+			 * IPI, the timer is registered on that dedicated
+			 * CPU here.
+			 */
+			kvm_set_lapic_tscdeadline_msr(apic->vcpu, expire_tsc);
+		else
+			/* deliver immediately if expired */
+			kvm_apic_local_deliver(apic, APIC_LVTT);
+	}
+	local_irq_restore(flags);
+}
+
+static int pv_timer_polling(void *arg)
+{
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+	int i;
+	mm_segment_t oldfs = get_fs();
+
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (kthread_should_stop()) {
+			__set_current_state(TASK_RUNNING);
+			break;
+		}
+
+		spin_lock(&kvm_lock);
+		__set_current_state(TASK_RUNNING);
+		list_for_each_entry(kvm, &vm_list, vm_list) {
+			set_fs(USER_DS);
+			use_mm(kvm->mm);
+			kvm_for_each_vcpu(i, vcpu, kvm) {
+				kvm_apic_sync_pv_timer(vcpu);
+			}
+			unuse_mm(kvm->mm);
+			set_fs(oldfs);
+		}
+
+		spin_unlock(&kvm_lock);
+
+		schedule();
+	}
+
+	return 0;
+}
+
+static void kvm_pv_timer_init(void)
+{
+	ktime_t ktime = ktime_set(0, pv_timer_period_ns);
+
+	hrtimer_init(&pv_sync_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+	pv_sync_timer.function = &pv_sync_timer_callback;
+
+	/* kthread for pv_timer sync buffer */
+	pv_timer_polling_worker = kthread_create(pv_timer_polling, NULL,
+						"pv_timer_polling_worker/%d",
+						PVTIMER_SYNC_CPU);
+	if (IS_ERR(pv_timer_polling_worker)) {
+		pr_warn_once("kvm: failed to create thread for pv_timer\n");
+		pv_timer_polling_worker = NULL;
+		hrtimer_cancel(&pv_sync_timer);
+
+		return;
+	}
+
+	kthread_bind(pv_timer_polling_worker, PVTIMER_SYNC_CPU);
+	wake_up_process(pv_timer_polling_worker);
+	hrtimer_start(&pv_sync_timer, ktime, HRTIMER_MODE_REL);
+}
+
 void kvm_lapic_init(void)
 {
+	kvm_pv_timer_init();
+
 	/* do not patch jump label more than once per second */
 	jump_label_rate_limit(&apic_hw_disabled, HZ);
 	jump_label_rate_limit(&apic_sw_disabled, HZ);
@@ -2551,6 +2683,12 @@ void kvm_lapic_init(void)
 
 void kvm_lapic_exit(void)
 {
+	if (pv_timer_polling_worker) {
+		hrtimer_cancel(&pv_sync_timer);
+		kthread_stop(pv_timer_polling_worker);
+		pv_timer_polling_worker = NULL;
+	}
+
 	static_key_deferred_flush(&apic_hw_disabled);
 	static_key_deferred_flush(&apic_sw_disabled);
 }
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 539a738..4588d59 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -16,6 +16,9 @@
 #define APIC_BUS_CYCLE_NS       1
 #define APIC_BUS_FREQUENCY      (1000000000ULL / APIC_BUS_CYCLE_NS)
 
+#define PVTIMER_SYNC_CPU	(NR_CPUS - 1) /* dedicated CPU */
+#define PVTIMER_PERIOD_NS	250000L /* pvtimer default period */
+
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
@@ -213,6 +216,8 @@ static inline bool pv_timer_enabled(struct kvm_vcpu *vcpu)
 	return vcpu->arch.pv_timer.msr_val & KVM_MSR_ENABLED;
 }
 
+void kvm_apic_sync_pv_timer(void *data);
+
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
 void wait_lapic_expire(struct kvm_vcpu *vcpu);
-- 
1.7.1

  parent reply	other threads:[~2017-12-08  8:40 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-08  8:39 [PATCH RFC 0/7] kvm pvtimer Quan Xu
2017-12-08  8:39 ` [PATCH RFC 1/7] kvm: x86: emulate MSR_KVM_PV_TIMER_EN MSR Quan Xu
2017-12-08  8:39 ` [PATCH RFC 2/7] kvm: x86: add a function to exchange value Quan Xu
2017-12-08  8:39 ` Quan Xu [this message]
2017-12-08 15:06   ` [PATCH RFC 3/7] KVM: timer: synchronize tsc-deadline timestamp for guest Konrad Rzeszutek Wilk
2017-12-14  1:54     ` Quan Xu
2017-12-08  8:39 ` [PATCH RFC 4/7] KVM: timer: program timer to a dedicated CPU Quan Xu
2017-12-08  8:39 ` [PATCH RFC 5/7] KVM: timer: ignore timer migration if pvtimer is enabled Quan Xu
2017-12-08  8:39 ` [PATCH RFC 6/7] Doc/KVM: introduce a new cpuid bit for kvm pvtimer Quan Xu
2017-12-08  8:39 ` [PATCH RFC 7/7] kvm: guest: reprogram guest timer Quan Xu
2017-12-08 15:10 ` [PATCH RFC 0/7] kvm pvtimer Konrad Rzeszutek Wilk
     [not found]   ` <CAFv8KnF1Re7Zn2LVvqh=Sr8MmaWJJwZBB1i5ws03GvPVKXRjzA@mail.gmail.com>
2017-12-13 16:28     ` Konrad Rzeszutek Wilk
2017-12-14  2:32       ` Quan Xu
2017-12-14 11:56       ` Paolo Bonzini
2017-12-14 12:06         ` Quan Xu
2017-12-14 13:00           ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1512722390-3654-4-git-send-email-quan.xu0@gmail.com \
    --to=quan.xu0@gmail.com \
    --cc=bn0418@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    --cc=yang.zhang.wz@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.