From: Suleiman Souhlal <suleiman@google.com>
To: pbonzini@redhat.com, rkrcmar@redhat.com, tglx@linutronix.de
Cc: john.stultz@linaro.org, sboyd@kernel.org,
linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
Suleiman Souhlal <suleiman@google.com>
Subject: [RFC 1/2] kvm: Mechanism to copy host timekeeping parameters into guest.
Date: Fri, 20 Sep 2019 15:27:12 +0900 [thread overview]
Message-ID: <20190920062713.78503-2-suleiman@google.com> (raw)
In-Reply-To: <20190920062713.78503-1-suleiman@google.com>
This is used to synchronize time between host and guest.
The guest can request the (guest) physical address it wants the
data in through the MSR_KVM_TIMEKEEPER_EN MSR.
We maintain a shadow copy of the timekeeper that gets updated
whenever the timekeeper gets updated, and then copied into the
guest.
It currently assumes the host timekeeper is "tsc".
Signed-off-by: Suleiman Souhlal <suleiman@google.com>
---
arch/x86/include/asm/kvm_host.h | 3 +
arch/x86/include/asm/pvclock-abi.h | 27 ++++++
arch/x86/include/uapi/asm/kvm_para.h | 1 +
arch/x86/kvm/x86.c | 121 +++++++++++++++++++++++++++
4 files changed, 152 insertions(+)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bdc16b0aa7c6..b1b4c3a80b8d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -666,7 +666,10 @@ struct kvm_vcpu_arch {
struct pvclock_vcpu_time_info hv_clock;
unsigned int hw_tsc_khz;
struct gfn_to_hva_cache pv_time;
+ struct gfn_to_hva_cache pv_timekeeper_g2h;
+ struct pvclock_timekeeper pv_timekeeper;
bool pv_time_enabled;
+ bool pv_timekeeper_enabled;
/* set guest stopped flag in pvclock flags field */
bool pvclock_set_guest_stopped_request;
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 1436226efe3e..2809008b9b26 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -40,6 +40,33 @@ struct pvclock_wall_clock {
u32 nsec;
} __attribute__((__packed__));
+struct pvclock_read_base {
+ u64 mask;
+ u64 cycle_last;
+ u32 mult;
+ u32 shift;
+ u64 xtime_nsec;
+ u64 base;
+} __attribute__((__packed__));
+
+struct pvclock_timekeeper {
+ u64 gen;
+ u64 flags;
+ struct pvclock_read_base tkr_mono;
+ struct pvclock_read_base tkr_raw;
+ u64 xtime_sec;
+ u64 ktime_sec;
+ u64 wall_to_monotonic_sec;
+ u64 wall_to_monotonic_nsec;
+ u64 offs_real;
+ u64 offs_boot;
+ u64 offs_tai;
+ u64 raw_sec;
+ u64 tsc_offset;
+} __attribute__((__packed__));
+
+#define PVCLOCK_TIMEKEEPER_ENABLED (1 << 0)
+
#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
#define PVCLOCK_GUEST_STOPPED (1 << 1)
/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 2a8e0b6b9805..3ebb1d87db3a 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -50,6 +50,7 @@
#define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
#define MSR_KVM_POLL_CONTROL 0x4b564d05
+#define MSR_KVM_TIMEKEEPER_EN 0x4b564d06
struct kvm_steal_time {
__u64 steal;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 91602d310a3f..06a940a74005 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -157,6 +157,8 @@ module_param(force_emulation_prefix, bool, S_IRUGO);
int __read_mostly pi_inject_timer = -1;
module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
+static atomic_t pv_timekeepers_nr;
+
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
@@ -2621,6 +2623,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
}
+ case MSR_KVM_TIMEKEEPER_EN:
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ &vcpu->arch.pv_timekeeper_g2h, data,
+ sizeof(struct pvclock_timekeeper)))
+ vcpu->arch.pv_timekeeper_enabled = false;
+ else {
+ vcpu->arch.pv_timekeeper_enabled = true;
+ atomic_inc(&pv_timekeepers_nr);
+ }
+ break;
case MSR_KVM_ASYNC_PF_EN:
if (kvm_pv_enable_async_pf(vcpu, data))
return 1;
@@ -6965,6 +6977,109 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
.handle_intel_pt_intr = kvm_handle_intel_pt_intr,
};
+static DEFINE_SPINLOCK(shadow_pvtk_lock);
+static struct pvclock_timekeeper shadow_pvtk;
+
+static void
+pvclock_copy_read_base(struct pvclock_read_base *pvtkr,
+ struct tk_read_base *tkr)
+{
+ pvtkr->cycle_last = tkr->cycle_last;
+ pvtkr->mult = tkr->mult;
+ pvtkr->shift = tkr->shift;
+ pvtkr->mask = tkr->mask;
+ pvtkr->xtime_nsec = tkr->xtime_nsec;
+ pvtkr->base = tkr->base;
+}
+
+static void
+kvm_copy_into_pvtk(struct kvm_vcpu *vcpu)
+{
+ struct pvclock_timekeeper *pvtk;
+ unsigned long flags;
+
+ if (!vcpu->arch.pv_timekeeper_enabled)
+ return;
+
+ pvtk = &vcpu->arch.pv_timekeeper;
+ if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_TSC) {
+ pvtk->flags |= PVCLOCK_TIMEKEEPER_ENABLED;
+ spin_lock_irqsave(&shadow_pvtk_lock, flags);
+ pvtk->tkr_mono = shadow_pvtk.tkr_mono;
+ pvtk->tkr_raw = shadow_pvtk.tkr_raw;
+
+ pvtk->xtime_sec = shadow_pvtk.xtime_sec;
+ pvtk->ktime_sec = shadow_pvtk.ktime_sec;
+ pvtk->wall_to_monotonic_sec =
+ shadow_pvtk.wall_to_monotonic_sec;
+ pvtk->wall_to_monotonic_nsec =
+ shadow_pvtk.wall_to_monotonic_nsec;
+ pvtk->offs_real = shadow_pvtk.offs_real;
+ pvtk->offs_boot = shadow_pvtk.offs_boot;
+ pvtk->offs_tai = shadow_pvtk.offs_tai;
+ pvtk->raw_sec = shadow_pvtk.raw_sec;
+ spin_unlock_irqrestore(&shadow_pvtk_lock, flags);
+
+ pvtk->tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
+ } else
+ pvtk->flags &= ~PVCLOCK_TIMEKEEPER_ENABLED;
+
+ BUILD_BUG_ON(offsetof(struct pvclock_timekeeper, gen) != 0);
+
+ /*
+ * Make the gen count odd to indicate we are in the process of
+ * updating.
+ */
+ vcpu->arch.pv_timekeeper.gen++;
+ vcpu->arch.pv_timekeeper.gen |= 1;
+
+ /*
+ * See comment in kvm_guest_time_update() for why we have to do
+ * multiple writes.
+ */
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_timekeeper_g2h,
+ &vcpu->arch.pv_timekeeper, sizeof(vcpu->arch.pv_timekeeper.gen));
+
+ smp_wmb();
+
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_timekeeper_g2h,
+ &vcpu->arch.pv_timekeeper, sizeof(vcpu->arch.pv_timekeeper));
+
+ smp_wmb();
+
+ vcpu->arch.pv_timekeeper.gen++;
+
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_timekeeper_g2h,
+ &vcpu->arch.pv_timekeeper, sizeof(vcpu->arch.pv_timekeeper.gen));
+}
+
+static void
+update_shadow_pvtk(struct timekeeper *tk)
+{
+ struct pvclock_timekeeper *pvtk;
+ unsigned long flags;
+
+ pvtk = &shadow_pvtk;
+
+ if (atomic_read(&pv_timekeepers_nr) == 0 ||
+ pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+ return;
+
+ spin_lock_irqsave(&shadow_pvtk_lock, flags);
+ pvclock_copy_read_base(&pvtk->tkr_mono, &tk->tkr_mono);
+ pvclock_copy_read_base(&pvtk->tkr_raw, &tk->tkr_raw);
+
+ pvtk->xtime_sec = tk->xtime_sec;
+ pvtk->ktime_sec = tk->ktime_sec;
+ pvtk->wall_to_monotonic_sec = tk->wall_to_monotonic.tv_sec;
+ pvtk->wall_to_monotonic_nsec = tk->wall_to_monotonic.tv_nsec;
+ pvtk->offs_real = tk->offs_real;
+ pvtk->offs_boot = tk->offs_boot;
+ pvtk->offs_tai = tk->offs_tai;
+ pvtk->raw_sec = tk->raw_sec;
+ spin_unlock_irqrestore(&shadow_pvtk_lock, flags);
+}
+
#ifdef CONFIG_X86_64
static void pvclock_gtod_update_fn(struct work_struct *work)
{
@@ -6993,6 +7108,7 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
struct timekeeper *tk = priv;
update_pvclock_gtod(tk);
+ update_shadow_pvtk(tk);
/* disable master clock if host does not trust, or does not
* use, TSC based clocksource.
@@ -7809,6 +7925,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_immediate_exit = false;
+ kvm_copy_into_pvtk(vcpu);
+
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
kvm_x86_ops->get_vmcs12_pages(vcpu);
@@ -8891,6 +9009,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvmclock_reset(vcpu);
+ if (vcpu->arch.pv_timekeeper_enabled)
+ atomic_dec(&pv_timekeepers_nr);
+
kvm_x86_ops->vcpu_free(vcpu);
free_cpumask_var(wbinvd_dirty_mask);
}
--
2.23.0.237.gc6a4ce50a0-goog
next prev parent reply other threads:[~2019-09-20 6:27 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-09-20 6:27 [RFC 0/2] kvm: Use host timekeeping in guest Suleiman Souhlal
2019-09-20 6:27 ` Suleiman Souhlal [this message]
2019-09-20 6:27 ` [RFC 2/2] x86/kvmclock: Use host timekeeping Suleiman Souhlal
2019-09-20 13:33 ` Vitaly Kuznetsov
2019-09-24 8:10 ` Suleiman Souhlal
2019-09-24 11:14 ` Vitaly Kuznetsov
2019-09-20 7:48 ` [RFC 0/2] kvm: Use host timekeeping in guest Paolo Bonzini
2019-09-20 10:23 ` Thomas Gleixner
2019-09-24 8:08 ` Suleiman Souhlal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190920062713.78503-2-suleiman@google.com \
--to=suleiman@google.com \
--cc=john.stultz@linaro.org \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=pbonzini@redhat.com \
--cc=rkrcmar@redhat.com \
--cc=sboyd@kernel.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).