All of lore.kernel.org
 help / color / mirror / Atom feed
From: Suleiman Souhlal <suleiman@google.com>
To: pbonzini@redhat.com, rkrcmar@redhat.com, tglx@linutronix.de
Cc: john.stultz@linaro.org, sboyd@kernel.org,
	linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
	ssouhlal@freebsd.org, tfiga@chromium.org, vkuznets@redhat.com,
	Suleiman Souhlal <suleiman@google.com>
Subject: [RFC v2 1/2] kvm: Mechanism to copy host timekeeping parameters into guest.
Date: Thu, 10 Oct 2019 16:30:54 +0900	[thread overview]
Message-ID: <20191010073055.183635-2-suleiman@google.com> (raw)
In-Reply-To: <20191010073055.183635-1-suleiman@google.com>

This is used to synchronize time between host and guest.
The guest can request the (guest) physical address it wants the
data in through the MSR_KVM_TIMEKEEPER_EN MSR.

It currently assumes the host timekeeper is "tsc".

Signed-off-by: Suleiman Souhlal <suleiman@google.com>
---
 arch/x86/include/asm/kvm_host.h      |   3 +
 arch/x86/include/asm/pvclock-abi.h   |  27 ++++++
 arch/x86/include/uapi/asm/kvm_para.h |   1 +
 arch/x86/kvm/x86.c                   | 121 +++++++++++++++++++++++++++
 4 files changed, 152 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 50eb430b0ad8..4d622450cb4a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -659,7 +659,10 @@ struct kvm_vcpu_arch {
 	struct pvclock_vcpu_time_info hv_clock;
 	unsigned int hw_tsc_khz;
 	struct gfn_to_hva_cache pv_time;
+	struct gfn_to_hva_cache pv_timekeeper_g2h;
+	struct pvclock_timekeeper pv_timekeeper;
 	bool pv_time_enabled;
+	bool pv_timekeeper_enabled;
 	/* set guest stopped flag in pvclock flags field */
 	bool pvclock_set_guest_stopped_request;
 
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 1436226efe3e..2809008b9b26 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -40,6 +40,33 @@ struct pvclock_wall_clock {
 	u32   nsec;
 } __attribute__((__packed__));
 
+struct pvclock_read_base {
+	u64 mask;
+	u64 cycle_last;
+	u32 mult;
+	u32 shift;
+	u64 xtime_nsec;
+	u64 base;
+} __attribute__((__packed__));
+
+struct pvclock_timekeeper {
+	u64 gen;
+	u64 flags;
+	struct pvclock_read_base tkr_mono;
+	struct pvclock_read_base tkr_raw;
+	u64 xtime_sec;
+	u64 ktime_sec;
+	u64 wall_to_monotonic_sec;
+	u64 wall_to_monotonic_nsec;
+	u64 offs_real;
+	u64 offs_boot;
+	u64 offs_tai;
+	u64 raw_sec;
+	u64 tsc_offset;
+} __attribute__((__packed__));
+
+#define	PVCLOCK_TIMEKEEPER_ENABLED (1 << 0)
+
 #define PVCLOCK_TSC_STABLE_BIT	(1 << 0)
 #define PVCLOCK_GUEST_STOPPED	(1 << 1)
 /* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 2a8e0b6b9805..3ebb1d87db3a 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -50,6 +50,7 @@
 #define MSR_KVM_STEAL_TIME  0x4b564d03
 #define MSR_KVM_PV_EOI_EN      0x4b564d04
 #define MSR_KVM_POLL_CONTROL	0x4b564d05
+#define MSR_KVM_TIMEKEEPER_EN	0x4b564d06
 
 struct kvm_steal_time {
 	__u64 steal;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 661e2bf38526..937f83cdda4b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -157,6 +157,8 @@ module_param(force_emulation_prefix, bool, S_IRUGO);
 int __read_mostly pi_inject_timer = -1;
 module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
 
+static atomic_t pv_timekeepers_nr;
+
 #define KVM_NR_SHARED_MSRS 16
 
 struct kvm_shared_msrs_global {
@@ -2729,6 +2731,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 		break;
 	}
+	case MSR_KVM_TIMEKEEPER_EN:
+		if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+		    &vcpu->arch.pv_timekeeper_g2h, data,
+		    sizeof(struct pvclock_timekeeper)))
+			vcpu->arch.pv_timekeeper_enabled = false;
+		else {
+			vcpu->arch.pv_timekeeper_enabled = true;
+			atomic_inc(&pv_timekeepers_nr);
+		}
+		break;
 	case MSR_KVM_ASYNC_PF_EN:
 		if (kvm_pv_enable_async_pf(vcpu, data))
 			return 1;
@@ -7097,6 +7109,109 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
 };
 
 #ifdef CONFIG_X86_64
+static DEFINE_SPINLOCK(shadow_pvtk_lock);
+static struct pvclock_timekeeper shadow_pvtk;
+
+static void
+pvclock_copy_read_base(struct pvclock_read_base *pvtkr,
+    struct tk_read_base *tkr)
+{
+	pvtkr->cycle_last = tkr->cycle_last;
+	pvtkr->mult = tkr->mult;
+	pvtkr->shift = tkr->shift;
+	pvtkr->mask = tkr->mask;
+	pvtkr->xtime_nsec = tkr->xtime_nsec;
+	pvtkr->base = tkr->base;
+}
+
+static void
+kvm_copy_into_pvtk(struct kvm_vcpu *vcpu)
+{
+	struct pvclock_timekeeper *pvtk;
+	unsigned long flags;
+
+	if (!vcpu->arch.pv_timekeeper_enabled)
+		return;
+
+	pvtk = &vcpu->arch.pv_timekeeper;
+	if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_TSC) {
+		pvtk->flags |= PVCLOCK_TIMEKEEPER_ENABLED;
+		spin_lock_irqsave(&shadow_pvtk_lock, flags);
+		pvtk->tkr_mono = shadow_pvtk.tkr_mono;
+		pvtk->tkr_raw = shadow_pvtk.tkr_raw;
+
+		pvtk->xtime_sec = shadow_pvtk.xtime_sec;
+		pvtk->ktime_sec = shadow_pvtk.ktime_sec;
+		pvtk->wall_to_monotonic_sec =
+		    shadow_pvtk.wall_to_monotonic_sec;
+		pvtk->wall_to_monotonic_nsec =
+		    shadow_pvtk.wall_to_monotonic_nsec;
+		pvtk->offs_real = shadow_pvtk.offs_real;
+		pvtk->offs_boot = shadow_pvtk.offs_boot;
+		pvtk->offs_tai = shadow_pvtk.offs_tai;
+		pvtk->raw_sec = shadow_pvtk.raw_sec;
+		spin_unlock_irqrestore(&shadow_pvtk_lock, flags);
+
+		pvtk->tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
+	} else
+		pvtk->flags &= ~PVCLOCK_TIMEKEEPER_ENABLED;
+
+	BUILD_BUG_ON(offsetof(struct pvclock_timekeeper, gen) != 0);
+
+	/*
+	 * Make the gen count odd to indicate we are in the process of
+	 * updating.
+	 */
+	vcpu->arch.pv_timekeeper.gen++;
+	vcpu->arch.pv_timekeeper.gen |= 1;
+
+	/*
+	 * See comment in kvm_guest_time_update() for why we have to do
+	 * multiple writes.
+	 */
+	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_timekeeper_g2h,
+	    &vcpu->arch.pv_timekeeper, sizeof(vcpu->arch.pv_timekeeper.gen));
+
+	smp_wmb();
+
+	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_timekeeper_g2h,
+	    &vcpu->arch.pv_timekeeper, sizeof(vcpu->arch.pv_timekeeper));
+
+	smp_wmb();
+
+	vcpu->arch.pv_timekeeper.gen++;
+
+	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_timekeeper_g2h,
+	    &vcpu->arch.pv_timekeeper, sizeof(vcpu->arch.pv_timekeeper.gen));
+}
+
+static void
+update_shadow_pvtk(struct timekeeper *tk)
+{
+	struct pvclock_timekeeper *pvtk;
+	unsigned long flags;
+
+	pvtk = &shadow_pvtk;
+
+	if (atomic_read(&pv_timekeepers_nr) == 0 ||
+	    pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+		return;
+
+	spin_lock_irqsave(&shadow_pvtk_lock, flags);
+	pvclock_copy_read_base(&pvtk->tkr_mono, &tk->tkr_mono);
+	pvclock_copy_read_base(&pvtk->tkr_raw, &tk->tkr_raw);
+
+	pvtk->xtime_sec = tk->xtime_sec;
+	pvtk->ktime_sec = tk->ktime_sec;
+	pvtk->wall_to_monotonic_sec = tk->wall_to_monotonic.tv_sec;
+	pvtk->wall_to_monotonic_nsec = tk->wall_to_monotonic.tv_nsec;
+	pvtk->offs_real = tk->offs_real;
+	pvtk->offs_boot = tk->offs_boot;
+	pvtk->offs_tai = tk->offs_tai;
+	pvtk->raw_sec = tk->raw_sec;
+	spin_unlock_irqrestore(&shadow_pvtk_lock, flags);
+}
+
 static void pvclock_gtod_update_fn(struct work_struct *work)
 {
 	struct kvm *kvm;
@@ -7124,6 +7239,7 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
 	struct timekeeper *tk = priv;
 
 	update_pvclock_gtod(tk);
+	update_shadow_pvtk(tk);
 
 	/* disable master clock if host does not trust, or does not
 	 * use, TSC based clocksource.
@@ -7940,6 +8056,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	bool req_immediate_exit = false;
 
+	kvm_copy_into_pvtk(vcpu);
+
 	if (kvm_request_pending(vcpu)) {
 		if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
 			kvm_x86_ops->get_vmcs12_pages(vcpu);
@@ -9020,6 +9138,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 
 	kvmclock_reset(vcpu);
 
+	if (vcpu->arch.pv_timekeeper_enabled)
+		atomic_dec(&pv_timekeepers_nr);
+
 	kvm_x86_ops->vcpu_free(vcpu);
 	free_cpumask_var(wbinvd_dirty_mask);
 }
-- 
2.23.0.581.g78d2f28ef7-goog


  reply	other threads:[~2019-10-10  7:38 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-10  7:30 [RFC v2 0/2] kvm: Use host timekeeping in guest Suleiman Souhlal
2019-10-10  7:30 ` Suleiman Souhlal [this message]
2019-10-10  8:31   ` [RFC v2 1/2] kvm: Mechanism to copy host timekeeping parameters into guest Vitaly Kuznetsov
2019-10-10 10:50   ` Paolo Bonzini
2019-10-10  7:30 ` [RFC v2 2/2] x86/kvmclock: Introduce kvm-hostclock clocksource Suleiman Souhlal
2019-10-10  8:37   ` Vitaly Kuznetsov
2019-10-10 10:55   ` Paolo Bonzini
2019-10-15  8:39     ` Suleiman Souhlal
2019-10-15  8:59       ` Paolo Bonzini
2019-10-10 10:39 ` [RFC v2 0/2] kvm: Use host timekeeping in guest Roman Kagan
2019-10-11 18:50   ` Konrad Rzeszutek Wilk
2019-10-15  5:08   ` Suleiman Souhlal
2019-10-10 10:43 ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191010073055.183635-2-suleiman@google.com \
    --to=suleiman@google.com \
    --cc=john.stultz@linaro.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    --cc=sboyd@kernel.org \
    --cc=ssouhlal@freebsd.org \
    --cc=tfiga@chromium.org \
    --cc=tglx@linutronix.de \
    --cc=vkuznets@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.