* [RFC PATCH 1/2] timekeeper: change interface of clocksource reding functions
2017-06-28 10:54 [RFC PATCH 0/2] make L2 kvm-clock stable Denis Plotnikov
@ 2017-06-28 10:55 ` Denis Plotnikov
2017-07-10 13:00 ` Radim Krčmář
2017-06-28 10:55 ` [RFC PATCH 2/2] KVM: x86: add support of kvm-clock stablity in L2 Denis Plotnikov
2017-07-03 16:12 ` [RFC PATCH 0/2] make L2 kvm-clock stable Denis Plotnikov
2 siblings, 1 reply; 11+ messages in thread
From: Denis Plotnikov @ 2017-06-28 10:55 UTC (permalink / raw)
To: kvm, rkrcmar, pbonzini; +Cc: dplotnikov, den, rkagan
When using timekeepeing API in some cases it is useful to return
timestamp value used along with the time calculated to use that
timestamp value for other purpuses (e.g. in KVM master clock timestamp)
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
---
arch/x86/hyperv/hv_init.c | 4 ++--
arch/x86/include/asm/pvclock.h | 2 +-
arch/x86/kernel/hpet.c | 4 ++--
arch/x86/kernel/kvmclock.c | 19 +++++++++------
arch/x86/kernel/pvclock.c | 11 ++++++---
arch/x86/kernel/tsc.c | 7 ++++--
arch/x86/lguest/boot.c | 2 +-
arch/x86/platform/uv/uv_time.c | 10 ++++----
arch/x86/xen/time.c | 21 ++++++++++-------
arch/x86/xen/xen-ops.h | 2 +-
drivers/clocksource/acpi_pm.c | 12 +++++-----
drivers/hv/hv_util.c | 6 ++---
include/linux/clocksource.h | 9 ++++++--
include/linux/timekeeper_internal.h | 2 +-
include/linux/timekeeping.h | 17 ++++++++++----
kernel/time/clocksource.c | 4 ++--
kernel/time/jiffies.c | 2 +-
kernel/time/timekeeping.c | 46 ++++++++++++++++++-------------------
18 files changed, 106 insertions(+), 74 deletions(-)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 5b882cc..5cd0db2 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -36,7 +36,7 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
return tsc_pg;
}
-static u64 read_hv_clock_tsc(struct clocksource *arg)
+static u64 read_hv_clock_tsc(struct clocksource *arg, u64 *tsc_stamp)
{
u64 current_tick = hv_read_tsc_page(tsc_pg);
@@ -55,7 +55,7 @@ static struct clocksource hyperv_cs_tsc = {
};
#endif
-static u64 read_hv_clock_msr(struct clocksource *arg)
+static u64 read_hv_clock_msr(struct clocksource *arg, u64 *tsc_stamp)
{
u64 current_tick;
/*
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 448cfe1..c3d049b 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -14,7 +14,7 @@ static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
#endif
/* some helper functions for xen and kvm pv clock sources */
-u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src, u64 *tsc_stamp);
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
void pvclock_set_flags(u8 flags);
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 89ff7af..b77d3fd 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -792,7 +792,7 @@ static union hpet_lock hpet __cacheline_aligned = {
{ .lock = __ARCH_SPIN_LOCK_UNLOCKED, },
};
-static u64 read_hpet(struct clocksource *cs)
+static u64 read_hpet(struct clocksource *cs, u64 *tsc_stamp)
{
unsigned long flags;
union hpet_lock old, new;
@@ -850,7 +850,7 @@ static u64 read_hpet(struct clocksource *cs)
/*
* For UP or 32-bit.
*/
-static u64 read_hpet(struct clocksource *cs)
+static u64 read_hpet(struct clocksource *cs, u64 *tsc_stamp)
{
return (u64)hpet_readl(HPET_COUNTER);
}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d889676..736c7fd 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -82,7 +82,7 @@ static int kvm_set_wallclock(const struct timespec *now)
return -1;
}
-static u64 kvm_clock_read(void)
+static u64 kvm_clock_read(u64 *tsc_stamp)
{
struct pvclock_vcpu_time_info *src;
u64 ret;
@@ -91,30 +91,35 @@ static u64 kvm_clock_read(void)
preempt_disable_notrace();
cpu = smp_processor_id();
src = &hv_clock[cpu].pvti;
- ret = pvclock_clocksource_read(src);
+ ret = pvclock_clocksource_read(src, tsc_stamp);
preempt_enable_notrace();
return ret;
}
-static u64 kvm_clock_get_cycles(struct clocksource *cs)
+static u64 kvm_clock_get_cycles(struct clocksource *cs, u64 *tsc_stamp)
{
- return kvm_clock_read();
+ return kvm_clock_read(tsc_stamp);
+}
+
+static u64 kvm_clock_read_single(void)
+{
+ return kvm_clock_read(NULL);
}
static u64 kvm_sched_clock_read(void)
{
- return kvm_clock_read() - kvm_sched_clock_offset;
+ return kvm_clock_read_single() - kvm_sched_clock_offset;
}
static inline void kvm_sched_clock_init(bool stable)
{
if (!stable) {
- pv_time_ops.sched_clock = kvm_clock_read;
+ pv_time_ops.sched_clock = kvm_clock_read_single;
clear_sched_clock_stable();
return;
}
- kvm_sched_clock_offset = kvm_clock_read();
+ kvm_sched_clock_offset = kvm_clock_read_single();
pv_time_ops.sched_clock = kvm_sched_clock_read;
printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n",
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 5c3f6d6..d5fe8c3 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -72,17 +72,22 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
return flags & valid_flags;
}
+EXPORT_SYMBOL(pvclock_read_flags);
-u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src, u64 *tsc_stamp)
{
unsigned version;
u64 ret;
u64 last;
u8 flags;
+ u64 tsc;
do {
version = pvclock_read_begin(src);
- ret = __pvclock_read_cycles(src, rdtsc_ordered());
+ tsc = rdtsc_ordered();
+ if (tsc_stamp)
+ *tsc_stamp = tsc;
+ ret = __pvclock_read_cycles(src, tsc);
flags = src->flags;
} while (pvclock_read_retry(src, version));
@@ -136,7 +141,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
rmb(); /* fetch time before checking version */
} while ((wall_clock->version & 1) || (version != wall_clock->version));
- delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */
+ delta = pvclock_clocksource_read(vcpu_time, NULL); /* time since system boot */
delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
now.tv_nsec = do_div(delta, NSEC_PER_SEC);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 714dfba..fee4d7b 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1110,9 +1110,12 @@ static void tsc_resume(struct clocksource *cs)
* checking the result of read_tsc() - cycle_last for being negative.
* That works because CLOCKSOURCE_MASK(64) does not mask out any bit.
*/
-static u64 read_tsc(struct clocksource *cs)
+static u64 read_tsc(struct clocksource *cs, u64 *tsc_stamp)
{
- return (u64)rdtsc_ordered();
+ u64 tsc = (u64)rdtsc_ordered();
+ if (tsc_stamp)
+ *tsc_stamp = tsc;
+ return tsc;
}
static void tsc_cs_mark_unstable(struct clocksource *cs)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 9947269..731423f 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -916,7 +916,7 @@ static unsigned long lguest_tsc_khz(void)
* If we can't use the TSC, the kernel falls back to our lower-priority
* "lguest_clock", where we read the time value given to us by the Host.
*/
-static u64 lguest_clock_read(struct clocksource *cs)
+static u64 lguest_clock_read(struct clocksource *cs, u64 *tsc_stamp)
{
unsigned long sec, nsec;
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index b082d71..24a4b61 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -30,7 +30,7 @@
#define RTC_NAME "sgi_rtc"
-static u64 uv_read_rtc(struct clocksource *cs);
+static u64 uv_read_rtc(struct clocksource *cs, u64 *tsc_stamp);
static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
static int uv_rtc_shutdown(struct clock_event_device *evt);
@@ -133,7 +133,7 @@ static int uv_setup_intr(int cpu, u64 expires)
/* Initialize comparator value */
uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
- if (uv_read_rtc(NULL) <= expires)
+ if (uv_read_rtc(NULL, NULL) <= expires)
return 0;
return !uv_intr_pending(pnode);
@@ -269,7 +269,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
spin_lock_irqsave(&head->lock, flags);
- if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
+ if ((head->next_cpu == bcpu && uv_read_rtc(NULL, NULL) >= *t) || force)
rc = 1;
if (rc) {
@@ -296,7 +296,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
* cachelines of it's own page. This allows faster simultaneous reads
* from a given socket.
*/
-static u64 uv_read_rtc(struct clocksource *cs)
+static u64 uv_read_rtc(struct clocksource *cs, u64 *tsc_stamp)
{
unsigned long offset;
@@ -316,7 +316,7 @@ static int uv_rtc_next_event(unsigned long delta,
{
int ced_cpu = cpumask_first(ced->cpumask);
- return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL));
+ return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL, NULL));
}
/*
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a1895a8..6dfab65 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -39,21 +39,21 @@ static unsigned long xen_tsc_khz(void)
return pvclock_tsc_khz(info);
}
-u64 xen_clocksource_read(void)
+u64 xen_clocksource_read(u64 *tsc_stamp)
{
struct pvclock_vcpu_time_info *src;
u64 ret;
preempt_disable_notrace();
src = &__this_cpu_read(xen_vcpu)->time;
- ret = pvclock_clocksource_read(src);
+ ret = pvclock_clocksource_read(src, tsc_stamp);
preempt_enable_notrace();
return ret;
}
-static u64 xen_clocksource_get_cycles(struct clocksource *cs)
+static u64 xen_clocksource_get_cycles(struct clocksource *cs, u64 *tsc_stamp)
{
- return xen_clocksource_read();
+ return xen_clocksource_read(tsc_stamp);
}
static void xen_read_wallclock(struct timespec *ts)
@@ -105,12 +105,12 @@ static int xen_pvclock_gtod_notify(struct notifier_block *nb,
op.u.settime64.mbz = 0;
op.u.settime64.secs = now.tv_sec;
op.u.settime64.nsecs = now.tv_nsec;
- op.u.settime64.system_time = xen_clocksource_read();
+ op.u.settime64.system_time = xen_clocksource_read(NULL);
} else {
op.cmd = XENPF_settime32;
op.u.settime32.secs = now.tv_sec;
op.u.settime32.nsecs = now.tv_nsec;
- op.u.settime32.system_time = xen_clocksource_read();
+ op.u.settime32.system_time = xen_clocksource_read(NULL);
}
ret = HYPERVISOR_platform_op(&op);
@@ -178,7 +178,7 @@ static struct clocksource xen_clocksource __read_mostly = {
*/
static s64 get_abs_timeout(unsigned long delta)
{
- return xen_clocksource_read() + delta;
+ return xen_clocksource_read(NULL) + delta;
}
static int xen_timerop_shutdown(struct clock_event_device *evt)
@@ -366,8 +366,13 @@ void xen_timer_resume(void)
}
}
+static u64 xen_clocksource_read_data(void)
+{
+ return xen_clocksource_read(NULL);
+}
+
static const struct pv_time_ops xen_time_ops __initconst = {
- .sched_clock = xen_clocksource_read,
+ .sched_clock = xen_clocksource_read_data,
.steal_clock = xen_steal_clock,
};
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9a440a4..035f2ea 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -67,7 +67,7 @@ void xen_init_irq_ops(void);
void xen_setup_timer(int cpu);
void xen_setup_runstate_info(int cpu);
void xen_teardown_timer(int cpu);
-u64 xen_clocksource_read(void);
+u64 xen_clocksource_read(u64 *tsc_stamp);
void xen_setup_cpu_clockevents(void);
void __init xen_init_time_ops(void);
void __init xen_hvm_init_time_ops(void);
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 1961e35..a75de2c 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -58,7 +58,7 @@ u32 acpi_pm_read_verified(void)
return v2;
}
-static u64 acpi_pm_read(struct clocksource *cs)
+static u64 acpi_pm_read(struct clocksource *cs, u64 *tsc_stamp)
{
return (u64)read_pmtmr();
}
@@ -81,7 +81,7 @@ static int __init acpi_pm_good_setup(char *__str)
}
__setup("acpi_pm_good", acpi_pm_good_setup);
-static u64 acpi_pm_read_slow(struct clocksource *cs)
+static u64 acpi_pm_read_slow(struct clocksource *cs, u64 *tsc_stamp)
{
return (u64)acpi_pm_read_verified();
}
@@ -149,9 +149,9 @@ static int verify_pmtmr_rate(void)
unsigned long count, delta;
mach_prepare_counter();
- value1 = clocksource_acpi_pm.read(&clocksource_acpi_pm);
+ value1 = clocksource_acpi_pm.read(&clocksource_acpi_pm, NULL);
mach_countup(&count);
- value2 = clocksource_acpi_pm.read(&clocksource_acpi_pm);
+ value2 = clocksource_acpi_pm.read(&clocksource_acpi_pm, NULL);
delta = (value2 - value1) & ACPI_PM_MASK;
/* Check that the PMTMR delta is within 5% of what we expect */
@@ -184,9 +184,9 @@ static int __init init_acpi_pm_clocksource(void)
/* "verify" this timing source: */
for (j = 0; j < ACPI_PM_MONOTONICITY_CHECKS; j++) {
udelay(100 * j);
- value1 = clocksource_acpi_pm.read(&clocksource_acpi_pm);
+ value1 = clocksource_acpi_pm.read(&clocksource_acpi_pm, NULL);
for (i = 0; i < ACPI_PM_READ_CHECKS; i++) {
- value2 = clocksource_acpi_pm.read(&clocksource_acpi_pm);
+ value2 = clocksource_acpi_pm.read(&clocksource_acpi_pm, NULL);
if (value2 == value1)
continue;
if (value2 > value1)
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 186b100..74def09 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -218,7 +218,7 @@ static void hv_set_host_time(struct work_struct *work)
wrk = container_of(work, struct adj_time_work, work);
- reftime = hyperv_cs->read(hyperv_cs);
+ reftime = hyperv_cs->read(hyperv_cs, NULL);
newtime = wrk->host_time + (reftime - wrk->ref_time);
host_ts = ns_to_timespec64((newtime - WLTIMEDELTA) * 100);
@@ -278,7 +278,7 @@ static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags)
*/
spin_lock_irqsave(&host_ts.lock, flags);
- cur_reftime = hyperv_cs->read(hyperv_cs);
+ cur_reftime = hyperv_cs->read(hyperv_cs, NULL);
host_ts.host_time = hosttime;
host_ts.ref_time = cur_reftime;
ktime_get_snapshot(&host_ts.snap);
@@ -530,7 +530,7 @@ static int hv_ptp_gettime(struct ptp_clock_info *info, struct timespec64 *ts)
u64 newtime, reftime;
spin_lock_irqsave(&host_ts.lock, flags);
- reftime = hyperv_cs->read(hyperv_cs);
+ reftime = hyperv_cs->read(hyperv_cs, NULL);
newtime = host_ts.host_time + (reftime - host_ts.ref_time);
*ts = ns_to_timespec64((newtime - WLTIMEDELTA) * 100);
spin_unlock_irqrestore(&host_ts.lock, flags);
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index f2b10d9..897aeb5 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -48,7 +48,12 @@ struct module;
* 400-499: Perfect
* The ideal clocksource. A must-use where
* available.
- * @read: returns a cycle value, passes clocksource as argument
+ * @read: returns a cycle value, passes clocksource and
+ * a pointer where tsc value is stored which was used in
+ * calcualtion of the cycle value if any,
+ * otherwise the pointer value is untouched. Must check
+ * if the pointer is not-NULL
+ * this value is used in kvm code for storing tsc_timestamp
* @enable: optional function to enable the clocksource
* @disable: optional function to disable the clocksource
* @mask: bitmask for two's complement
@@ -77,7 +82,7 @@ struct module;
* structure.
*/
struct clocksource {
- u64 (*read)(struct clocksource *cs);
+ u64 (*read)(struct clocksource *cs, u64 *tsc_stamp);
u64 mask;
u32 mult;
u32 shift;
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 110f453..58a2281 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -29,7 +29,7 @@
*/
struct tk_read_base {
struct clocksource *clock;
- u64 (*read)(struct clocksource *cs);
+ u64 (*read)(struct clocksource *cs, u64 *tsc_stamp);
u64 mask;
u64 cycle_last;
u32 mult;
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index ddc229f..31df92c 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -171,7 +171,7 @@ enum tk_offsets {
};
extern ktime_t ktime_get(void);
-extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
+extern ktime_t ktime_get_with_offset(enum tk_offsets offs, u64 *cycles);
extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
extern ktime_t ktime_get_raw(void);
extern u32 ktime_get_resolution_ns(void);
@@ -181,7 +181,7 @@ extern u32 ktime_get_resolution_ns(void);
*/
static inline ktime_t ktime_get_real(void)
{
- return ktime_get_with_offset(TK_OFFS_REAL);
+ return ktime_get_with_offset(TK_OFFS_REAL, NULL);
}
/**
@@ -192,15 +192,19 @@ static inline ktime_t ktime_get_real(void)
*/
static inline ktime_t ktime_get_boottime(void)
{
- return ktime_get_with_offset(TK_OFFS_BOOT);
+ return ktime_get_with_offset(TK_OFFS_BOOT, NULL);
}
+static inline ktime_t ktime_get_boottime_and_cycles(u64 *cycles)
+{
+ return ktime_get_with_offset(TK_OFFS_BOOT, cycles);
+}
/**
* ktime_get_clocktai - Returns the TAI time of day in ktime_t format
*/
static inline ktime_t ktime_get_clocktai(void)
{
- return ktime_get_with_offset(TK_OFFS_TAI);
+ return ktime_get_with_offset(TK_OFFS_TAI, NULL);
}
/**
@@ -226,6 +230,11 @@ static inline u64 ktime_get_boot_ns(void)
return ktime_to_ns(ktime_get_boottime());
}
+static inline u64 ktime_get_boot_ns_and_cycles(u64 *cycles)
+{
+ return ktime_to_ns(ktime_get_boottime_and_cycles(cycles));
+}
+
static inline u64 ktime_get_tai_ns(void)
{
return ktime_to_ns(ktime_get_clocktai());
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 93621ae..e48a6eb 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -194,8 +194,8 @@ static void clocksource_watchdog(unsigned long data)
}
local_irq_disable();
- csnow = cs->read(cs);
- wdnow = watchdog->read(watchdog);
+ csnow = cs->read(cs, NULL);
+ wdnow = watchdog->read(watchdog, NULL);
local_irq_enable();
/* Clocksource initialized ? */
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 4977191..079a78ec 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -48,7 +48,7 @@
#define JIFFIES_SHIFT 8
#endif
-static u64 jiffies_read(struct clocksource *cs)
+static u64 jiffies_read(struct clocksource *cs, u64 *tsc_stamp)
{
return (u64) jiffies;
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 9652bc5..994f83b 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -160,7 +160,7 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
}
}
-static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
+static inline u64 timekeeping_get_delta(struct tk_read_base *tkr, u64 *tsc_stamp)
{
struct timekeeper *tk = &tk_core.timekeeper;
u64 now, last, mask, max, delta;
@@ -175,7 +175,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
*/
do {
seq = read_seqcount_begin(&tk_core.seq);
- now = tkr->read(tkr->clock);
+ now = tkr->read(tkr->clock, tsc_stamp);
last = tkr->cycle_last;
mask = tkr->mask;
max = tkr->clock->max_cycles;
@@ -204,12 +204,12 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
{
}
-static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
+static inline u64 timekeeping_get_delta(struct tk_read_base *tkr, u64 *tsc_stamp)
{
u64 cycle_now, delta;
/* read clocksource */
- cycle_now = tkr->read(tkr->clock);
+ cycle_now = tkr->read(tkr->clock, tsc_stamp);
/* calculate the delta since the last update_wall_time */
delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -240,7 +240,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
tk->tkr_mono.clock = clock;
tk->tkr_mono.read = clock->read;
tk->tkr_mono.mask = clock->mask;
- tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+ tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock, NULL);
tk->tkr_raw.clock = clock;
tk->tkr_raw.read = clock->read;
@@ -311,11 +311,11 @@ static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr, u64 delta)
return nsec + arch_gettimeoffset();
}
-static inline u64 timekeeping_get_ns(struct tk_read_base *tkr)
+static inline u64 timekeeping_get_ns(struct tk_read_base *tkr, u64 *tsc_stamp)
{
u64 delta;
- delta = timekeeping_get_delta(tkr);
+ delta = timekeeping_get_delta(tkr, tsc_stamp);
return timekeeping_delta_to_ns(tkr, delta);
}
@@ -404,7 +404,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
now += timekeeping_delta_to_ns(tkr,
clocksource_delta(
- tkr->read(tkr->clock),
+ tkr->read(tkr->clock, NULL),
tkr->cycle_last,
tkr->mask));
} while (read_seqcount_retry(&tkf->seq, seq));
@@ -456,7 +456,7 @@ EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
/* Suspend-time cycles value for halted fast timekeeper. */
static u64 cycles_at_suspend;
-static u64 dummy_clock_read(struct clocksource *cs)
+static u64 dummy_clock_read(struct clocksource *cs, u64 *tsc_stamp)
{
return cycles_at_suspend;
}
@@ -477,7 +477,7 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
struct tk_read_base *tkr = &tk->tkr_mono;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
- cycles_at_suspend = tkr->read(tkr->clock);
+ cycles_at_suspend = tkr->read(tkr->clock, NULL);
tkr_dummy.read = dummy_clock_read;
update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
@@ -653,7 +653,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
u64 cycle_now, delta;
u64 nsec;
- cycle_now = tk->tkr_mono.read(clock);
+ cycle_now = tk->tkr_mono.read(clock, NULL);
delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
@@ -686,7 +686,7 @@ int __getnstimeofday64(struct timespec64 *ts)
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->xtime_sec;
- nsecs = timekeeping_get_ns(&tk->tkr_mono);
+ nsecs = timekeeping_get_ns(&tk->tkr_mono, NULL);
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -727,7 +727,7 @@ ktime_t ktime_get(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr_mono.base;
- nsecs = timekeeping_get_ns(&tk->tkr_mono);
+ nsecs = timekeeping_get_ns(&tk->tkr_mono, NULL);
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -758,7 +758,7 @@ static ktime_t *offsets[TK_OFFS_MAX] = {
[TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai,
};
-ktime_t ktime_get_with_offset(enum tk_offsets offs)
+ktime_t ktime_get_with_offset(enum tk_offsets offs, u64 *tsc_stamp)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
@@ -770,7 +770,7 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
do {
seq = read_seqcount_begin(&tk_core.seq);
base = ktime_add(tk->tkr_mono.base, *offset);
- nsecs = timekeeping_get_ns(&tk->tkr_mono);
+ nsecs = timekeeping_get_ns(&tk->tkr_mono, tsc_stamp);
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -812,7 +812,7 @@ ktime_t ktime_get_raw(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr_raw.base;
- nsecs = timekeeping_get_ns(&tk->tkr_raw);
+ nsecs = timekeeping_get_ns(&tk->tkr_raw, NULL);
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -840,7 +840,7 @@ void ktime_get_ts64(struct timespec64 *ts)
do {
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->xtime_sec;
- nsec = timekeeping_get_ns(&tk->tkr_mono);
+ nsec = timekeeping_get_ns(&tk->tkr_mono, NULL);
tomono = tk->wall_to_monotonic;
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -930,7 +930,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
do {
seq = read_seqcount_begin(&tk_core.seq);
- now = tk->tkr_mono.read(tk->tkr_mono.clock);
+ now = tk->tkr_mono.read(tk->tkr_mono.clock, NULL);
systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
base_real = ktime_add(tk->tkr_mono.base,
@@ -1108,7 +1108,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
* Check whether the system counter value provided by the
* device driver is on the current timekeeping interval.
*/
- now = tk->tkr_mono.read(tk->tkr_mono.clock);
+ now = tk->tkr_mono.read(tk->tkr_mono.clock, NULL);
interval_start = tk->tkr_mono.cycle_last;
if (!cycle_between(interval_start, cycles, now)) {
clock_was_set_seq = tk->clock_was_set_seq;
@@ -1359,7 +1359,7 @@ void getrawmonotonic64(struct timespec64 *ts)
do {
seq = read_seqcount_begin(&tk_core.seq);
- nsecs = timekeeping_get_ns(&tk->tkr_raw);
+ nsecs = timekeeping_get_ns(&tk->tkr_raw, NULL);
ts64 = tk->raw_time;
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1629,7 +1629,7 @@ void timekeeping_resume(void)
* The less preferred source will only be tried if there is no better
* usable source. The rtc part is handled separately in rtc core code.
*/
- cycle_now = tk->tkr_mono.read(clock);
+ cycle_now = tk->tkr_mono.read(clock, NULL);
if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
cycle_now > tk->tkr_mono.cycle_last) {
u64 nsec, cyc_delta;
@@ -2030,7 +2030,7 @@ void update_wall_time(void)
#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
offset = real_tk->cycle_interval;
#else
- offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+ offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock, NULL),
tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
#endif
@@ -2203,7 +2203,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr_mono.base;
- nsecs = timekeeping_get_ns(&tk->tkr_mono);
+ nsecs = timekeeping_get_ns(&tk->tkr_mono, NULL);
base = ktime_add_ns(base, nsecs);
if (*cwsseq != tk->clock_was_set_seq) {
--
2.7.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC PATCH 2/2] KVM: x86: add support of kvm-clock stablity in L2
2017-06-28 10:54 [RFC PATCH 0/2] make L2 kvm-clock stable Denis Plotnikov
2017-06-28 10:55 ` [RFC PATCH 1/2] timekeeper: change interface of clocksource reding functions Denis Plotnikov
@ 2017-06-28 10:55 ` Denis Plotnikov
2017-07-10 13:25 ` Radim Krčmář
2017-07-03 16:12 ` [RFC PATCH 0/2] make L2 kvm-clock stable Denis Plotnikov
2 siblings, 1 reply; 11+ messages in thread
From: Denis Plotnikov @ 2017-06-28 10:55 UTC (permalink / raw)
To: kvm, rkrcmar, pbonzini; +Cc: dplotnikov, den, rkagan
Get rid of complex shadow monotonic timekeeper support in KVM.
Extend and use timekeeper infrastructure instead.
Make kvm-clock stable in L2 using the changed timekeeper
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
---
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/kvm/x86.c | 294 +++++++++++-----------------------------
include/linux/timekeeping.h | 19 ++-
kernel/time/timekeeping.c | 22 ++-
4 files changed, 117 insertions(+), 220 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 695605e..66d678c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -781,7 +781,7 @@ struct kvm_arch {
u64 cur_tsc_generation;
int nr_vcpus_matched_tsc;
- spinlock_t pvclock_gtod_sync_lock;
+ spinlock_t master_clock_lock;
bool use_master_clock;
u64 master_kernel_ns;
u64 master_cycle_now;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 87d3cb9..49ae57fc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -50,6 +50,7 @@
#include <linux/hash.h>
#include <linux/pci.h>
#include <linux/timekeeper_internal.h>
+#include <linux/timekeeping.h>
#include <linux/pvclock_gtod.h>
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
@@ -1131,50 +1132,6 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
return kvm_set_msr(vcpu, &msr);
}
-#ifdef CONFIG_X86_64
-struct pvclock_gtod_data {
- seqcount_t seq;
-
- struct { /* extract of a clocksource struct */
- int vclock_mode;
- u64 cycle_last;
- u64 mask;
- u32 mult;
- u32 shift;
- } clock;
-
- u64 boot_ns;
- u64 nsec_base;
- u64 wall_time_sec;
-};
-
-static struct pvclock_gtod_data pvclock_gtod_data;
-
-static void update_pvclock_gtod(struct timekeeper *tk)
-{
- struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
- u64 boot_ns;
-
- boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
-
- write_seqcount_begin(&vdata->seq);
-
- /* copy pvclock gtod data */
- vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
- vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
- vdata->clock.mask = tk->tkr_mono.mask;
- vdata->clock.mult = tk->tkr_mono.mult;
- vdata->clock.shift = tk->tkr_mono.shift;
-
- vdata->boot_ns = boot_ns;
- vdata->nsec_base = tk->tkr_mono.xtime_nsec;
-
- vdata->wall_time_sec = tk->xtime_sec;
-
- write_seqcount_end(&vdata->seq);
-}
-#endif
-
void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
{
/*
@@ -1266,10 +1223,6 @@ static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
__func__, base_hz, scaled_hz, shift, *pmultiplier);
}
-#ifdef CONFIG_X86_64
-static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
-#endif
-
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
static unsigned long max_tsc_khz;
@@ -1358,31 +1311,52 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
return tsc;
}
+static bool pvclock_stable(void)
+{
+ struct pvclock_vcpu_time_info *pvti = &pvclock_pvti_cpu0_va()->pvti;
+ return pvclock_read_flags(pvti) & PVCLOCK_TSC_STABLE_BIT;
+}
+
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
bool vcpus_matched;
struct kvm_arch *ka = &vcpu->kvm->arch;
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+
+ unsigned seq;
+ const seqcount_t *s = get_tk_seq();
+ int vclock_mode;
+ bool stable;
vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
atomic_read(&vcpu->kvm->online_vcpus));
- /*
- * Once the masterclock is enabled, always perform request in
- * order to update it.
- *
- * In order to enable masterclock, the host clocksource must be TSC
- * and the vcpus need to have matched TSCs. When that happens,
- * perform request to enable masterclock.
- */
- if (ka->use_master_clock ||
- (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
+ {
+ seq = read_seqcount_begin(s);
+ vclock_mode = get_tk_mono_clock_mode();
+ stable = false;
+ /*
+ * Once the masterclock is enabled, always perform request in
+ * order to update it.
+ *
+ * In order to enable masterclock, the host clocksource must be TSC
+ * or stable paravirtualized clocksource and the vcpus need to
+ * have matched TSCs.
+ * When that happens, perform request to enable masterclock.
+ */
+ if (ka->use_master_clock ||
+ ((vclock_mode == VCLOCK_TSC ||
+ (vclock_mode == VCLOCK_PVCLOCK && pvclock_stable())) &&
+ vcpus_matched))
+ stable = true;
+ } while (unlikely(read_seqcount_retry(s, seq)));
+
+ if (stable)
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
atomic_read(&vcpu->kvm->online_vcpus),
- ka->use_master_clock, gtod->clock.vclock_mode);
+ ka->use_master_clock, vclock_mode);
#endif
}
@@ -1535,7 +1509,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
kvm_vcpu_write_tsc_offset(vcpu, offset);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
- spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
+ spin_lock(&kvm->arch.master_clock_lock);
if (!matched) {
kvm->arch.nr_vcpus_matched_tsc = 0;
} else if (!already_matched) {
@@ -1543,7 +1517,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
}
kvm_track_tsc_matching(vcpu);
- spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
+ spin_unlock(&kvm->arch.master_clock_lock);
}
EXPORT_SYMBOL_GPL(kvm_write_tsc);
@@ -1563,99 +1537,45 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
}
#ifdef CONFIG_X86_64
-
-static u64 read_tsc(void)
-{
- u64 ret = (u64)rdtsc_ordered();
- u64 last = pvclock_gtod_data.clock.cycle_last;
-
- if (likely(ret >= last))
- return ret;
-
- /*
- * GCC likes to generate cmov here, but this branch is extremely
- * predictable (it's just a function of time and the likely is
- * very likely) and there's a data dependence, so force GCC
- * to generate a branch instead. I don't barrier() because
- * we don't actually need a barrier, and if this function
- * ever gets inlined it will generate worse code.
- */
- asm volatile ("");
- return last;
-}
-
-static inline u64 vgettsc(u64 *cycle_now)
-{
- long v;
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
-
- *cycle_now = read_tsc();
-
- v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
- return v * gtod->clock.mult;
-}
-
-static int do_monotonic_boot(s64 *t, u64 *cycle_now)
-{
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
- unsigned long seq;
- int mode;
- u64 ns;
-
- do {
- seq = read_seqcount_begin(>od->seq);
- mode = gtod->clock.vclock_mode;
- ns = gtod->nsec_base;
- ns += vgettsc(cycle_now);
- ns >>= gtod->clock.shift;
- ns += gtod->boot_ns;
- } while (unlikely(read_seqcount_retry(>od->seq, seq)));
- *t = ns;
-
- return mode;
-}
-
-static int do_realtime(struct timespec *ts, u64 *cycle_now)
+/* returns true if host is using tsc clocksource */
+static bool kvm_get_host_time_and_cycles(s64 *kernel_ns, u64 *cycle_now,
+ u64 (*get_time)(u64 *cycle_now))
{
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
- unsigned long seq;
- int mode;
- u64 ns;
-
- do {
- seq = read_seqcount_begin(>od->seq);
- mode = gtod->clock.vclock_mode;
- ts->tv_sec = gtod->wall_time_sec;
- ns = gtod->nsec_base;
- ns += vgettsc(cycle_now);
- ns >>= gtod->clock.shift;
- } while (unlikely(read_seqcount_retry(>od->seq, seq)));
-
- ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
- ts->tv_nsec = ns;
+ unsigned seq;
+ const seqcount_t *s = get_tk_seq();
+ int vclock_mode;
+ bool res;
+
+ {
+ res = false;
+ seq = read_seqcount_begin(s);
+ vclock_mode = get_tk_mono_clock_mode();
+ if (vclock_mode == VCLOCK_TSC ||
+ (vclock_mode == VCLOCK_PVCLOCK && pvclock_stable())) {
+ *kernel_ns = get_time(cycle_now);
+ res = true;
+ }
+ } while (unlikely(read_seqcount_retry(s, seq)));
- return mode;
+ return res;
}
-/* returns true if host is using tsc clocksource */
static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
{
- /* checked again under seqlock below */
- if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
- return false;
-
- return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
+ return kvm_get_host_time_and_cycles(
+ kernel_ns, cycle_now, ktime_get_boot_ns_with_cycles);
}
-/* returns true if host is using tsc clocksource */
-static bool kvm_get_walltime_and_clockread(struct timespec *ts,
- u64 *cycle_now)
+static bool kvm_get_walltime_and_clockread(struct timespec *ts, u64 *cycle_now)
{
- /* checked again under seqlock below */
- if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
- return false;
+ bool res;
+ s64 kernel_ns;
- return do_realtime(ts, cycle_now) == VCLOCK_TSC;
+ res = kvm_get_host_time_and_cycles(
+ &kernel_ns, cycle_now, ktime_get_real_ns_with_cycles);
+ *ts = ktime_to_timespec(kernel_ns);
+
+ return res;
}
#endif
@@ -1700,19 +1620,18 @@ static bool kvm_get_walltime_and_clockread(struct timespec *ts,
*
*/
-static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
+static void update_masterclock_data(struct kvm *kvm)
{
#ifdef CONFIG_X86_64
struct kvm_arch *ka = &kvm->arch;
- int vclock_mode;
bool host_tsc_clocksource, vcpus_matched;
vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
atomic_read(&kvm->online_vcpus));
/*
- * If the host uses TSC clock, then passthrough TSC as stable
- * to the guest.
+ * If the host uses TSC clock or a stable paravirtualized clock,
+ * then passthrough the clock as stable to the guest.
*/
host_tsc_clocksource = kvm_get_time_and_clockread(
&ka->master_kernel_ns,
@@ -1721,13 +1640,6 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
ka->use_master_clock = host_tsc_clocksource && vcpus_matched
&& !backwards_tsc_observed
&& !ka->boot_vcpu_runs_old_kvmclock;
-
- if (ka->use_master_clock)
- atomic_set(&kvm_guest_has_master_clock, 1);
-
- vclock_mode = pvclock_gtod_data.clock.vclock_mode;
- trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
- vcpus_matched);
#endif
}
@@ -1743,10 +1655,10 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
struct kvm_vcpu *vcpu;
struct kvm_arch *ka = &kvm->arch;
- spin_lock(&ka->pvclock_gtod_sync_lock);
+ spin_lock(&ka->master_clock_lock);
kvm_make_mclock_inprogress_request(kvm);
/* no guest entries from this point */
- pvclock_update_vm_gtod_copy(kvm);
+ update_masterclock_data(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -1755,7 +1667,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
- spin_unlock(&ka->pvclock_gtod_sync_lock);
+ spin_unlock(&ka->master_clock_lock);
#endif
}
@@ -1765,15 +1677,15 @@ u64 get_kvmclock_ns(struct kvm *kvm)
struct pvclock_vcpu_time_info hv_clock;
u64 ret;
- spin_lock(&ka->pvclock_gtod_sync_lock);
+ spin_lock(&ka->master_clock_lock);
if (!ka->use_master_clock) {
- spin_unlock(&ka->pvclock_gtod_sync_lock);
+ spin_unlock(&ka->master_clock_lock);
return ktime_get_boot_ns() + ka->kvmclock_offset;
}
hv_clock.tsc_timestamp = ka->master_cycle_now;
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
- spin_unlock(&ka->pvclock_gtod_sync_lock);
+ spin_unlock(&ka->master_clock_lock);
/* both __this_cpu_read() and rdtsc() should be on the same cpu */
get_cpu();
@@ -1859,13 +1771,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
* If the host uses TSC clock, then passthrough TSC as stable
* to the guest.
*/
- spin_lock(&ka->pvclock_gtod_sync_lock);
+ spin_lock(&ka->master_clock_lock);
use_master_clock = ka->use_master_clock;
if (use_master_clock) {
host_tsc = ka->master_cycle_now;
kernel_ns = ka->master_kernel_ns;
}
- spin_unlock(&ka->pvclock_gtod_sync_lock);
+ spin_unlock(&ka->master_clock_lock);
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
@@ -6012,50 +5924,6 @@ static void kvm_set_mmio_spte_mask(void)
kvm_mmu_set_mmio_spte_mask(mask);
}
-#ifdef CONFIG_X86_64
-static void pvclock_gtod_update_fn(struct work_struct *work)
-{
- struct kvm *kvm;
-
- struct kvm_vcpu *vcpu;
- int i;
-
- spin_lock(&kvm_lock);
- list_for_each_entry(kvm, &vm_list, vm_list)
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
- atomic_set(&kvm_guest_has_master_clock, 0);
- spin_unlock(&kvm_lock);
-}
-
-static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
-
-/*
- * Notification about pvclock gtod data update.
- */
-static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
- void *priv)
-{
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
- struct timekeeper *tk = priv;
-
- update_pvclock_gtod(tk);
-
- /* disable master clock if host does not trust, or does not
- * use, TSC clocksource
- */
- if (gtod->clock.vclock_mode != VCLOCK_TSC &&
- atomic_read(&kvm_guest_has_master_clock) != 0)
- queue_work(system_long_wq, &pvclock_gtod_work);
-
- return 0;
-}
-
-static struct notifier_block pvclock_gtod_notifier = {
- .notifier_call = pvclock_gtod_notify,
-};
-#endif
-
int kvm_arch_init(void *opaque)
{
int r;
@@ -6104,9 +5972,6 @@ int kvm_arch_init(void *opaque)
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_lapic_init();
-#ifdef CONFIG_X86_64
- pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
-#endif
return 0;
@@ -6125,9 +5990,6 @@ void kvm_arch_exit(void)
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
-#ifdef CONFIG_X86_64
- pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
-#endif
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
free_percpu(shared_msrs);
@@ -8029,10 +7891,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
mutex_init(&kvm->arch.hyperv.hv_lock);
- spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
+ spin_lock_init(&kvm->arch.master_clock_lock);
kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
- pvclock_update_vm_gtod_copy(kvm);
+ update_masterclock_data(kvm);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 31df92c..b0a06b0 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -171,7 +171,10 @@ enum tk_offsets {
};
extern ktime_t ktime_get(void);
+extern ktime_t ktime_get_with_cycles(u64 *cycles);
extern ktime_t ktime_get_with_offset(enum tk_offsets offs, u64 *cycles);
+extern const seqcount_t* get_tk_seq(void);
+extern int get_tk_mono_clock_mode(void);
extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
extern ktime_t ktime_get_raw(void);
extern u32 ktime_get_resolution_ns(void);
@@ -184,6 +187,10 @@ static inline ktime_t ktime_get_real(void)
return ktime_get_with_offset(TK_OFFS_REAL, NULL);
}
+static inline ktime_t ktime_get_real_with_cycles(u64 *cycles)
+{
+ return ktime_get_with_offset(TK_OFFS_REAL, cycles);
+}
/**
* ktime_get_boottime - Returns monotonic time since boot in ktime_t format
*
@@ -220,17 +227,27 @@ static inline u64 ktime_get_ns(void)
return ktime_to_ns(ktime_get());
}
+static inline u64 ktime_get_ns_with_cycles(u64 *cycles)
+{
+ return ktime_to_ns(ktime_get_with_cycles(cycles));
+}
+
static inline u64 ktime_get_real_ns(void)
{
return ktime_to_ns(ktime_get_real());
}
+static inline u64 ktime_get_real_ns_with_cycles(u64 *cycles)
+{
+ return ktime_to_ns(ktime_get_real_with_cycles(cycles));
+}
+
static inline u64 ktime_get_boot_ns(void)
{
return ktime_to_ns(ktime_get_boottime());
}
-static inline u64 ktime_get_boot_ns_and_cycles(u64 *cycles)
+static inline u64 ktime_get_boot_ns_with_cycles(u64 *cycles)
{
return ktime_to_ns(ktime_get_boottime_and_cycles(cycles));
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 994f83b..7dbcac4 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -717,6 +717,12 @@ EXPORT_SYMBOL(getnstimeofday64);
ktime_t ktime_get(void)
{
+ return ktime_get_with_cycles(NULL);
+}
+EXPORT_SYMBOL_GPL(ktime_get);
+
+ktime_t ktime_get_with_cycles(u64 *cycles)
+{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base;
@@ -727,13 +733,13 @@ ktime_t ktime_get(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr_mono.base;
- nsecs = timekeeping_get_ns(&tk->tkr_mono, NULL);
+ nsecs = timekeeping_get_ns(&tk->tkr_mono, cycles);
} while (read_seqcount_retry(&tk_core.seq, seq));
return ktime_add_ns(base, nsecs);
}
-EXPORT_SYMBOL_GPL(ktime_get);
+EXPORT_SYMBOL_GPL(ktime_get_with_cycles);
u32 ktime_get_resolution_ns(void)
{
@@ -779,6 +785,18 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs, u64 *tsc_stamp)
}
EXPORT_SYMBOL_GPL(ktime_get_with_offset);
+const seqcount_t* get_tk_seq()
+{
+ return &tk_core.seq;
+}
+EXPORT_SYMBOL(get_tk_seq);
+
+int get_tk_mono_clock_mode()
+{
+ return tk_core.timekeeper.tkr_mono.clock->archdata.vclock_mode;
+}
+EXPORT_SYMBOL(get_tk_mono_clock_mode);
+
/**
* ktime_mono_to_any() - convert mononotic time to any other time
* @tmono: time to convert.
--
2.7.4
^ permalink raw reply related [flat|nested] 11+ messages in thread