All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] kvmclock: improve accuracy
@ 2016-02-08 15:18 Paolo Bonzini
  2016-02-08 15:18 ` [PATCH 1/4] KVM: x86: rename argument to kvm_set_tsc_khz Paolo Bonzini
                   ` (4 more replies)
  0 siblings, 5 replies; 36+ messages in thread
From: Paolo Bonzini @ 2016-02-08 15:18 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: mtosatti

Currently kvmclock is obtaining the multiplier and shift value from
the TSC kHz.  These however are less accurate than the host kernel's
clock, which includes corrections made through NTP.

These patches change kvmclock to tick at the same frequency as the
host kernel's clocksource (as obtained through the pvclock_gtod
notifier).  This is precise enough that the Hyper-V clock can be
implemented on top of it.

Paolo Bonzini (4):
  KVM: x86: rename argument to kvm_set_tsc_khz
  KVM: x86: rewrite handling of scaled TSC for kvmclock
  KVM: x86: pass kvm_get_time_scale arguments in hertz
  KVM: x86: track actual TSC frequency from the timekeeper struct

 arch/x86/include/asm/kvm_host.h |  3 +-
 arch/x86/kvm/x86.c              | 73 +++++++++++++++++++++++++----------------
 2 files changed, 47 insertions(+), 29 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread
* [PATCH kvm-unit-tests] KVM: x86: add hyperv clock test case
@ 2016-01-28 14:04 Paolo Bonzini
  2016-01-28 14:04 ` Paolo Bonzini
                   ` (2 more replies)
  0 siblings, 3 replies; 36+ messages in thread
From: Paolo Bonzini @ 2016-01-28 14:04 UTC (permalink / raw)
  To: kvm; +Cc: Andrey Smetanin, Roman Kagan, Denis V. Lunev

The test checks the relative precision of the reference TSC page
and the time reference counter.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	Andrey, the test has a relative error of approximately 3 parts
	per million on my machine.  In other words it drifts by 3
	microseconds every second, which I don't think is acceptable.
	The problem is that virtual_tsc_khz is 2593993 while the actual
	frequency is more like 2594001 kHz.

	But I have a problem in general with using tsc_khz to compute
	the hyperv reference clock scale.  The maximum possible accuracy
	on a 2.5 GHz machine is around 200 ppb, corresponding to a drift
	of about 16 milliseconds per day.  Perhaps we could use Linux
	(and kvmclock's) timekeeping parameters to derive the tsc_scale
	and tsc_offset?

 config/config-x86-common.mak |   2 +
 config/config-x86_64.mak     |   1 +
 x86/hyperv.h                 |   9 ++
 x86/hyperv_clock.c           | 193 +++++++++++++++++++++++++++++++++++++++++++
 x86/unittests.cfg            |   5 ++
 5 files changed, 210 insertions(+)
 create mode 100644 x86/hyperv_clock.c

diff --git a/config/config-x86-common.mak b/config/config-x86-common.mak
index 72b95e3..621413b 100644
--- a/config/config-x86-common.mak
+++ b/config/config-x86-common.mak
@@ -119,6 +119,8 @@ $(TEST_DIR)/hyperv_synic.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
 $(TEST_DIR)/hyperv_stimer.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
                                $(TEST_DIR)/hyperv_stimer.o
 
+$(TEST_DIR)/hyperv_clock.elf: $(cstart.o) $(TEST_DIR)/hyperv.o
+
 arch_clean:
 	$(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
 	$(TEST_DIR)/.*.d lib/x86/.*.d
diff --git a/config/config-x86_64.mak b/config/config-x86_64.mak
index 1764701..634d09d 100644
--- a/config/config-x86_64.mak
+++ b/config/config-x86_64.mak
@@ -12,5 +12,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
 tests += $(TEST_DIR)/svm.flat
 tests += $(TEST_DIR)/vmx.flat
 tests += $(TEST_DIR)/tscdeadline_latency.flat
+tests += $(TEST_DIR)/hyperv_clock.flat
 
 include config/config-x86-common.mak
diff --git a/x86/hyperv.h b/x86/hyperv.h
index faf931b..974df56 100644
--- a/x86/hyperv.h
+++ b/x86/hyperv.h
@@ -12,6 +12,7 @@
 #define HV_X64_MSR_SYNTIMER_AVAILABLE           (1 << 3)
 
 #define HV_X64_MSR_TIME_REF_COUNT               0x40000020
+#define HV_X64_MSR_REFERENCE_TSC                0x40000021
 
 /* Define synthetic interrupt controller model specific registers. */
 #define HV_X64_MSR_SCONTROL                     0x40000080
@@ -180,4 +181,12 @@ void synic_sint_create(int vcpu, int sint, int vec, bool auto_eoi);
 void synic_sint_set(int vcpu, int sint);
 void synic_sint_destroy(int vcpu, int sint);
 
+struct hv_reference_tsc_page {
+        uint32_t tsc_sequence;
+        uint32_t res1;
+        uint64_t tsc_scale;
+        int64_t tsc_offset;
+};
+
+
 #endif
diff --git a/x86/hyperv_clock.c b/x86/hyperv_clock.c
new file mode 100644
index 0000000..680ba7c
--- /dev/null
+++ b/x86/hyperv_clock.c
@@ -0,0 +1,193 @@
+#include "libcflat.h"
+#include "smp.h"
+#include "atomic.h"
+#include "processor.h"
+#include "hyperv.h"
+#include "vm.h"
+
+#define MAX_CPU 4
+#define TICKS_PER_SEC (1000000000 / 100)
+
+struct hv_reference_tsc_page *hv_clock;
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u64 mul_frac)
+{
+	u64 product, unused;
+
+	__asm__ (
+		"mul %3"
+		: "=d" (product), "=a" (unused) : "1" (delta), "rm" ((u64)mul_frac) );
+
+	return product;
+}
+
+static u64 hvclock_tsc_to_ticks(struct hv_reference_tsc_page *shadow, uint64_t tsc)
+{
+	u64 delta = tsc;
+	return scale_delta(delta, shadow->tsc_scale) + shadow->tsc_offset;
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static void hvclock_get_time_values(struct hv_reference_tsc_page *shadow,
+				    struct hv_reference_tsc_page *page)
+{
+	int seq;
+	do {
+		seq = page->tsc_sequence;
+		rmb();		/* fetch version before data */
+		*shadow = *page;
+		rmb();		/* test version after fetching data */
+	} while (shadow->tsc_sequence != seq);
+}
+
+uint64_t hv_clock_read(void)
+{
+	struct hv_reference_tsc_page shadow;
+
+	hvclock_get_time_values(&shadow, hv_clock);
+	return hvclock_tsc_to_ticks(&shadow, rdtsc());
+}
+
+atomic_t cpus_left;
+bool ok[MAX_CPU];
+uint64_t loops[MAX_CPU];
+
+static void hv_clock_test(void *data)
+{
+	int i = smp_id();
+	uint64_t t = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	uint64_t end = t + 3 * TICKS_PER_SEC;
+
+	ok[i] = true;
+	do {
+		uint64_t now = hv_clock_read();
+		if (now < t) {
+			printf("warp on CPU %d!\n", smp_id());
+			ok[i] = false;
+			break;
+		}
+		t = now;
+	} while(t < end);
+
+	barrier();
+	if (t >= end) {
+		int64_t ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+		if (i == 0)
+			printf("Time reference MSR drift: %lld\n\n", ref - end);
+		ok[i] &= (ref - end) > -5 && (ref - end) < 5;
+	}
+
+	atomic_dec(&cpus_left);
+}
+
+static void check_test(int ncpus)
+{
+	int i;
+	bool pass;
+
+	atomic_set(&cpus_left, ncpus);
+	for (i = ncpus - 1; i >= 0; i--)
+		on_cpu_async(i, hv_clock_test, NULL);
+
+	/* Wait for the end of other vcpu */
+	while(atomic_read(&cpus_left))
+		;
+
+	pass = true;
+	for (i = ncpus - 1; i >= 0; i--)
+		pass &= ok[i];
+
+	report("TSC reference precision test", pass);
+}
+
+static void hv_perf_test(void *data)
+{
+	uint64_t t = hv_clock_read();
+	uint64_t end = t + 1000000000 / 100;
+	uint64_t local_loops = 0;
+
+	do {
+		t = hv_clock_read();
+		local_loops++;
+	} while(t < end);
+
+	loops[smp_id()] = local_loops;
+	atomic_dec(&cpus_left);
+}
+
+static void perf_test(int ncpus)
+{
+	int i;
+	uint64_t total_loops;
+
+	atomic_set(&cpus_left, ncpus);
+	for (i = ncpus - 1; i >= 0; i--)
+		on_cpu_async(i, hv_perf_test, NULL);
+
+	/* Wait for the end of other vcpu */
+	while(atomic_read(&cpus_left))
+		;
+
+	total_loops = 0;
+	for (i = ncpus - 1; i >= 0; i--)
+		total_loops += loops[i];
+	printf("iterations/sec:  %lld\n", total_loops / ncpus);
+}
+
+int main(int ac, char **av)
+{
+	int nerr = 0;
+	int ncpus;
+	struct hv_reference_tsc_page shadow;
+	uint64_t tsc1, t1, tsc2, t2;
+	uint64_t ref1, ref2;
+
+	setup_vm();
+	smp_init();
+
+	hv_clock = alloc_page();
+	wrmsr(HV_X64_MSR_REFERENCE_TSC, (u64)(uintptr_t)hv_clock | 1);
+	report("MSR value after enabling",
+	       rdmsr(HV_X64_MSR_REFERENCE_TSC) == ((u64)(uintptr_t)hv_clock | 1));
+
+	hvclock_get_time_values(&shadow, hv_clock);
+	if (shadow.tsc_sequence == 0 || shadow.tsc_sequence == 0xFFFFFFFF) {
+		printf("Reference TSC page not available\n");
+		exit(1);
+	}
+
+	printf("scale: %llx offset: %lld\n", shadow.tsc_scale, shadow.tsc_offset);
+	ref1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	tsc1 = rdtsc();
+	t1 = hvclock_tsc_to_ticks(&shadow, tsc1);
+	printf("refcnt %lld, TSC %llx, TSC reference %lld\n",
+	       ref1, tsc1, t1);
+
+	do
+		ref2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	while (ref2 < ref1 + 2 * TICKS_PER_SEC);
+
+	tsc2 = rdtsc();
+	t2 = hvclock_tsc_to_ticks(&shadow, tsc2);
+	printf("refcnt %lld (delta %lld), TSC %llx, TSC reference %lld (delta %lld)\n",
+	       ref2, ref2 - ref1, tsc2, t2, t2 - t1);
+
+	ncpus = cpu_count();
+	if (ncpus > MAX_CPU)
+		ncpus = MAX_CPU;
+
+	check_test(ncpus);
+	perf_test(ncpus);
+
+	wrmsr(HV_X64_MSR_REFERENCE_TSC, 0LL);
+	report("MSR value after disabling", rdmsr(HV_X64_MSR_REFERENCE_TSC) == 0);
+
+	return nerr > 0 ? 1 : 0;
+}
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index 99eff26..e981c00 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -188,3 +188,8 @@ extra_params = -cpu kvm64,hv_synic -device hyperv-testdev
 file = hyperv_stimer.flat
 smp = 2
 extra_params = -cpu kvm64,hv_time,hv_synic,hv_stimer -device hyperv-testdev
+
+[hyperv_clock]
+file = hyperv_clock.flat
+smp = 2
+extra_params = -cpu kvm64,hv_time
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2016-05-30 10:41 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-08 15:18 [PATCH 0/4] kvmclock: improve accuracy Paolo Bonzini
2016-02-08 15:18 ` [PATCH 1/4] KVM: x86: rename argument to kvm_set_tsc_khz Paolo Bonzini
2016-02-11 15:01   ` Marcelo Tosatti
2016-02-08 15:18 ` [PATCH 2/4] KVM: x86: rewrite handling of scaled TSC for kvmclock Paolo Bonzini
2016-02-11 15:23   ` Marcelo Tosatti
2016-02-08 15:18 ` [PATCH 3/4] KVM: x86: pass kvm_get_time_scale arguments in hertz Paolo Bonzini
2016-02-08 15:18 ` [PATCH 4/4] KVM: x86: track actual TSC frequency from the timekeeper struct Paolo Bonzini
2016-02-09 18:41   ` Owen Hofmann
2016-02-10 13:57     ` Paolo Bonzini
2016-02-16 13:48   ` Marcelo Tosatti
2016-02-16 14:25     ` Marcelo Tosatti
2016-02-16 16:59       ` Paolo Bonzini
2016-02-19 14:12         ` Marcelo Tosatti
2016-02-19 15:53           ` Paolo Bonzini
2016-02-16 14:00 ` [PATCH 0/4] kvmclock: improve accuracy Marcelo Tosatti
  -- strict thread matches above, loose matches on Subject: below --
2016-01-28 14:04 [PATCH kvm-unit-tests] KVM: x86: add hyperv clock test case Paolo Bonzini
2016-01-28 14:04 ` Paolo Bonzini
2016-01-28 14:25 ` Andrey Smetanin
2016-01-28 14:50   ` Paolo Bonzini
2016-01-28 15:53     ` Paolo Bonzini
2016-01-28 18:45       ` Roman Kagan
2016-01-28 18:53     ` Roman Kagan
2016-01-28 21:28       ` Paolo Bonzini
2016-01-28 16:22 ` Roman Kagan
2016-02-03 16:37   ` Paolo Bonzini
2016-02-04  9:33     ` Roman Kagan
2016-02-04 10:13       ` Paolo Bonzini
2016-02-04 11:12         ` Roman Kagan
2016-04-21 17:01     ` Roman Kagan
2016-04-22 13:32       ` Roman Kagan
2016-04-22 18:08         ` Paolo Bonzini
2016-04-25  8:47           ` Roman Kagan
2016-04-26 10:34             ` Roman Kagan
2016-05-25 18:33               ` Roman Kagan
2016-05-26 14:47                 ` Roman Kagan
2016-05-29 22:34                 ` Marcelo Tosatti

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.