linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/5] KVM virtual PTP driver (v3)
@ 2017-01-20 12:20 Marcelo Tosatti
  2017-01-20 12:20 ` [patch 1/5] KVM: x86: provide realtime host clock via vsyscall notifiers Marcelo Tosatti
                   ` (5 more replies)
  0 siblings, 6 replies; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-20 12:20 UTC (permalink / raw)
  To: kvm, linux-kernel
  Cc: Paolo Bonzini, Radim Krcmar, Richard Cochran, Miroslav Lichvar

This patchset implements a virtual PTP driver which allows guest to sync
its clock to the host clock with high precision
(error is < 1us on an idle guest).

Changelog from v2

Cross timestamping, emulation of PTP_SYS_OFFSET with cross timestamping
(Paolo, Miroslav, Radim).

Changelog from v1

Patch1:
v2: unify nsec_base (Radim)

Patch2:
v2: improve documentation (Radim)
    change hypercall name to KVM_HC_CLOCK_PAIRING (Radim)
    increase padding size

Patch3:
v2: check for kvmclock (Radim)
    initialize global variables before device registration (Radim)



PHC0                          0   3   377     7   -141ns[ +234ns] +/-    7ns
#* PHC0                          0   3   377     5   -197ns[ -403ns] +/-    3ns
#* PHC0                          0   3   377    11   +157ns[ +264ns] +/-    3ns
#* PHC0                          0   3   377     9    +92ns[ +304ns] +/-    2ns
#* PHC0                          0   3   377     7   -102ns[ -164ns] +/-    3ns
#* PHC0                          0   3   377     5   -157ns[ -269ns] +/-    2ns
#* PHC0                          0   3   377    10    -63ns[  -98ns] +/-    2ns
#* PHC0                          0   3   377     8   +351ns[ +399ns] +/-    2ns
#* PHC0                          0   3   377     6   +385ns[ +399ns] +/-    4ns
#* PHC0                          0   3   377     4   -133ns[ -110ns] +/-    6ns
#* PHC0                          0   3   377    10    -59ns[  -90ns] +/-    3ns
#* PHC0                          0   3   377     8    -90ns[ +381ns] +/-    9ns
#* PHC0                          0   3   377     6     +6ns[ +216ns] +/-    6ns
#* PHC0                          0   3   377     4   +166ns[ -666ns] +/-   11ns
#* PHC0                          0   3   377    10    -18ns[ +323ns] +/-   10ns
#* PHC0                          0   3   377     8    -12ns[ +121ns] +/-    5ns
#* PHC0                          0   3   377     5     +4ns[ +218ns] +/-    7ns
#* PHC0                          0   3   377     4   +162ns[ -683ns] +/-   11ns
#* PHC0                          0   3   377    10    -82ns[ +310ns] +/-   12ns
#* PHC0                          0   3   377     7     +5ns[ -320ns] +/-    9ns
#* PHC0                          0   3   377     5    -13ns[ +165ns] +/-    7ns
#* PHC0                          0   3   377     3     +6ns[ +105ns] +/-    2ns
#* PHC0                          0   3   377     9    -19ns[  -67ns] +/-    3ns
#* PHC0                          0   3   377     8    +89ns[ +181ns] +/-    3ns
#* PHC0                          0   3   377     6    +93ns[ +168ns] +/-    2ns
#* PHC0                          0   3   377     4   +100ns[ +154ns] +/-    2ns
#* PHC0                          0   3   377    10   -249ns[ +180ns] +/-    7ns
#* PHC0                          0   3   377     8   +158ns[ -582ns] +/-   22ns
#* PHC0                          0   3   377     6    +10ns[ +213ns] +/-    9ns
#* PHC0                          0   3   377     3    -35ns[ +258ns] +/-    5ns
#* PHC0                          0   3   377    10    +25ns[ -620ns] +/-    8ns
#* PHC0                          0   3   377     7    +13ns[ +153ns] +/-    2ns

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [patch 1/5] KVM: x86: provide realtime host clock via vsyscall notifiers
  2017-01-20 12:20 [patch 0/5] KVM virtual PTP driver (v3) Marcelo Tosatti
@ 2017-01-20 12:20 ` Marcelo Tosatti
  2017-01-20 12:20 ` [patch 2/5] KVM: x86: add KVM_HC_CLOCK_OFFSET hypercall Marcelo Tosatti
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-20 12:20 UTC (permalink / raw)
  To: kvm, linux-kernel
  Cc: Paolo Bonzini, Radim Krcmar, Richard Cochran, Miroslav Lichvar,
	Marcelo Tosatti

[-- Attachment #1: kvm-do-realtime --]
[-- Type: text/plain, Size: 2010 bytes --]

Expose the realtime host clock and save the TSC value
used for the clock calculation.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 arch/x86/kvm/x86.c |   36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

v2: unify nsec_base (Radim)

Index: kvm-ptpdriver/arch/x86/kvm/x86.c
===================================================================
--- kvm-ptpdriver.orig/arch/x86/kvm/x86.c	2017-01-13 16:43:15.013244682 -0200
+++ kvm-ptpdriver/arch/x86/kvm/x86.c	2017-01-20 09:56:41.404534904 -0200
@@ -1139,6 +1139,7 @@
 
 	u64		boot_ns;
 	u64		nsec_base;
+	u64		wall_time_sec;
 };
 
 static struct pvclock_gtod_data pvclock_gtod_data;
@@ -1162,6 +1163,8 @@
 	vdata->boot_ns			= boot_ns;
 	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
 
+	vdata->wall_time_sec            = tk->xtime_sec;
+
 	write_seqcount_end(&vdata->seq);
 }
 #endif
@@ -1623,6 +1626,28 @@
 	return mode;
 }
 
+static int do_realtime(struct timespec *ts, cycle_t *cycle_now)
+{
+	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+	unsigned long seq;
+	int mode;
+	u64 ns;
+
+	do {
+		seq = read_seqcount_begin(&gtod->seq);
+		mode = gtod->clock.vclock_mode;
+		ts->tv_sec = gtod->wall_time_sec;
+		ns = gtod->nsec_base;
+		ns += vgettsc(cycle_now);
+		ns >>= gtod->clock.shift;
+	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
+	return mode;
+}
+
 /* returns true if host is using tsc clocksource */
 static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
 {
@@ -1632,6 +1657,17 @@
 
 	return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
 }
+
+/* returns true if host is using tsc clocksource */
+static bool kvm_get_walltime_and_clockread(struct timespec *ts,
+					   cycle_t *cycle_now)
+{
+	/* checked again under seqlock below */
+	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+		return false;
+
+	return do_realtime(ts, cycle_now) == VCLOCK_TSC;
+}
 #endif
 
 /*

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [patch 2/5] KVM: x86: add KVM_HC_CLOCK_OFFSET hypercall
  2017-01-20 12:20 [patch 0/5] KVM virtual PTP driver (v3) Marcelo Tosatti
  2017-01-20 12:20 ` [patch 1/5] KVM: x86: provide realtime host clock via vsyscall notifiers Marcelo Tosatti
@ 2017-01-20 12:20 ` Marcelo Tosatti
  2017-01-20 12:20 ` [patch 3/5] kvmclock: export kvmclock clocksource pointer Marcelo Tosatti
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-20 12:20 UTC (permalink / raw)
  To: kvm, linux-kernel
  Cc: Paolo Bonzini, Radim Krcmar, Richard Cochran, Miroslav Lichvar,
	Marcelo Tosatti

[-- Attachment #1: kvm-hv-clock-offset --]
[-- Type: text/plain, Size: 5057 bytes --]

Add a hypercall to retrieve the host realtime clock
and the TSC value used to calculate that clock read.

Used to implement clock synchronization between
host and guest.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 Documentation/virtual/kvm/hypercalls.txt |   33 ++++++++++++++++++++++++
 arch/x86/include/uapi/asm/kvm_para.h     |    9 ++++++
 arch/x86/kvm/x86.c                       |   41 +++++++++++++++++++++++++++++++
 include/uapi/linux/kvm_para.h            |    3 ++
 4 files changed, 86 insertions(+)

v2: improve documentation (Radim)
    change hypercall name to KVM_HC_CLOCK_PAIRING (Radim)
    increase padding size

Index: kvm-ptpdriver/arch/x86/include/uapi/asm/kvm_para.h
===================================================================
--- kvm-ptpdriver.orig/arch/x86/include/uapi/asm/kvm_para.h	2017-01-13 16:43:11.947240575 -0200
+++ kvm-ptpdriver/arch/x86/include/uapi/asm/kvm_para.h	2017-01-13 16:43:25.038258187 -0200
@@ -50,6 +50,15 @@
 	__u32 pad[11];
 };
 
+#define KVM_CLOCK_PAIRING_WALLCLOCK 0
+struct kvm_clock_offset {
+	__s64 sec;
+	__s64 nsec;
+	__u64 tsc;
+	__u32 flags;
+	__u32 pad[9];
+};
+
 #define KVM_STEAL_ALIGNMENT_BITS 5
 #define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
 #define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
Index: kvm-ptpdriver/Documentation/virtual/kvm/hypercalls.txt
===================================================================
--- kvm-ptpdriver.orig/Documentation/virtual/kvm/hypercalls.txt	2017-01-13 16:43:11.947240575 -0200
+++ kvm-ptpdriver/Documentation/virtual/kvm/hypercalls.txt	2017-01-13 16:43:25.038258187 -0200
@@ -81,3 +81,36 @@
 same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
 specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
 is used in the hypercall for future use.
+
+
+6. KVM_HC_CLOCK_PAIRING
+------------------------
+Architecture: x86
+Status: active
+Purpose: Hypercall used to synchronize host and guest clocks.
+Usage:
+
+a0: guest physical address where host copies
+"struct kvm_clock_offset" structure.
+
+a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0)
+is supported (hosts CLOCK_REALTIME clock).
+
+		struct kvm_clock_offset {
+			__s64 sec;
+			__s64 nsec;
+			__u64 tsc;
+			__u32 flags;
+			__u32 pad[9];
+		};
+
+       Where:
+               * sec: seconds from clock_type clock.
+               * nsec: nanoseconds from clock_type clock.
+               * tsc: TSC value used to calculate sec/nsec pair
+                 (this hypercall only works when host uses TSC clocksource).
+               * flags: flags, unused (0) at the moment.
+
+Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
+or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
+
Index: kvm-ptpdriver/arch/x86/kvm/x86.c
===================================================================
--- kvm-ptpdriver.orig/arch/x86/kvm/x86.c	2017-01-13 16:43:20.898252596 -0200
+++ kvm-ptpdriver/arch/x86/kvm/x86.c	2017-01-13 16:43:25.041258191 -0200
@@ -6119,6 +6119,44 @@
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
+static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
+			       unsigned long clock_type)
+{
+	struct kvm_clock_offset *clock_offset;
+	struct timespec ts;
+	cycle_t cycle;
+	int ret;
+
+	if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
+		return -KVM_EOPNOTSUPP;
+
+	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+		return -KVM_EOPNOTSUPP;
+
+	clock_offset = kzalloc(sizeof(struct kvm_clock_offset), GFP_KERNEL);
+	if (clock_offset == NULL)
+		return -KVM_ENOMEM;
+
+	if (kvm_get_walltime_and_clockread(&ts, &cycle) == false) {
+		kfree(clock_offset);
+		return -KVM_EOPNOTSUPP;
+	}
+
+	clock_offset->sec = ts.tv_sec;
+	clock_offset->nsec = ts.tv_nsec;
+	clock_offset->tsc = kvm_read_l1_tsc(vcpu, cycle);
+	clock_offset->flags = 0;
+
+	ret = 0;
+	if (kvm_write_guest(vcpu->kvm, paddr, clock_offset,
+			      sizeof(struct kvm_clock_offset)))
+		ret = -KVM_EFAULT;
+
+	kfree(clock_offset);
+
+	return ret;
+}
+
 /*
  * kvm_pv_kick_cpu_op:  Kick a vcpu.
  *
@@ -6183,6 +6221,9 @@
 		kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
 		ret = 0;
 		break;
+	case KVM_HC_CLOCK_PAIRING:
+		ret = kvm_pv_clock_pairing(vcpu, a0, a1);
+		break;
 	default:
 		ret = -KVM_ENOSYS;
 		break;
Index: kvm-ptpdriver/include/uapi/linux/kvm_para.h
===================================================================
--- kvm-ptpdriver.orig/include/uapi/linux/kvm_para.h	2017-01-13 16:43:11.947240575 -0200
+++ kvm-ptpdriver/include/uapi/linux/kvm_para.h	2017-01-13 16:43:25.042258192 -0200
@@ -14,6 +14,8 @@
 #define KVM_EFAULT		EFAULT
 #define KVM_E2BIG		E2BIG
 #define KVM_EPERM		EPERM
+#define KVM_EOPNOTSUPP		EOPNOTSUPP
+#define KVM_ENOMEM		ENOMEM
 
 #define KVM_HC_VAPIC_POLL_IRQ		1
 #define KVM_HC_MMU_OP			2
@@ -23,6 +25,7 @@
 #define KVM_HC_MIPS_GET_CLOCK_FREQ	6
 #define KVM_HC_MIPS_EXIT_VM		7
 #define KVM_HC_MIPS_CONSOLE_OUTPUT	8
+#define KVM_HC_CLOCK_PAIRING		9
 
 /*
  * hypercalls use architecture specific

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [patch 3/5] kvmclock: export kvmclock clocksource pointer
  2017-01-20 12:20 [patch 0/5] KVM virtual PTP driver (v3) Marcelo Tosatti
  2017-01-20 12:20 ` [patch 1/5] KVM: x86: provide realtime host clock via vsyscall notifiers Marcelo Tosatti
  2017-01-20 12:20 ` [patch 2/5] KVM: x86: add KVM_HC_CLOCK_OFFSET hypercall Marcelo Tosatti
@ 2017-01-20 12:20 ` Marcelo Tosatti
  2017-01-20 12:55   ` Paolo Bonzini
  2017-01-20 12:20 ` [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure Marcelo Tosatti
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-20 12:20 UTC (permalink / raw)
  To: kvm, linux-kernel
  Cc: Paolo Bonzini, Radim Krcmar, Richard Cochran, Miroslav Lichvar,
	Marcelo Tosatti

[-- Attachment #1: kvmclock-export-clocksource --]
[-- Type: text/plain, Size: 1324 bytes --]

To be used by KVM PTP driver.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 arch/x86/include/asm/kvmclock.h |    6 ++++++
 arch/x86/kernel/kvmclock.c      |    6 ++++++
 2 files changed, 12 insertions(+)

Index: kvm-ptpdriver/arch/x86/include/asm/kvmclock.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ kvm-ptpdriver/arch/x86/include/asm/kvmclock.h	2017-01-18 18:20:35.463472189 -0200
@@ -0,0 +1,6 @@
+#ifndef _ASM_X86_KVM_CLOCK_H
+#define _ASM_X86_KVM_CLOCK_H
+
+struct clocksource *get_kvmclock_cs(void);
+
+#endif /* _ASM_X86_KVM_CLOCK_H */
Index: kvm-ptpdriver/arch/x86/kernel/kvmclock.c
===================================================================
--- kvm-ptpdriver.orig/arch/x86/kernel/kvmclock.c	2017-01-11 21:22:13.306315984 -0200
+++ kvm-ptpdriver/arch/x86/kernel/kvmclock.c	2017-01-18 18:20:02.543397125 -0200
@@ -28,6 +28,7 @@
 
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
+#include <asm/kvmclock.h>
 
 static int kvmclock __ro_after_init = 1;
 static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
@@ -182,6 +183,11 @@
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+struct clocksource *get_kvmclock_cs(void)
+{
+	return &kvm_clock;
+}
+
 int kvm_register_clock(char *txt)
 {
 	int cpu = smp_processor_id();

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-20 12:20 [patch 0/5] KVM virtual PTP driver (v3) Marcelo Tosatti
                   ` (2 preceding siblings ...)
  2017-01-20 12:20 ` [patch 3/5] kvmclock: export kvmclock clocksource pointer Marcelo Tosatti
@ 2017-01-20 12:20 ` Marcelo Tosatti
  2017-01-20 12:55   ` Paolo Bonzini
  2017-01-20 20:25   ` Richard Cochran
  2017-01-20 12:20 ` [patch 5/5] PTP: add kvm PTP driver Marcelo Tosatti
  2017-01-20 13:10 ` [patch 0/5] KVM virtual PTP driver (v3) Paolo Bonzini
  5 siblings, 2 replies; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-20 12:20 UTC (permalink / raw)
  To: kvm, linux-kernel
  Cc: Paolo Bonzini, Radim Krcmar, Richard Cochran, Miroslav Lichvar,
	Marcelo Tosatti

[-- Attachment #1: ptp-add-sysoffset-callback --]
[-- Type: text/plain, Size: 5455 bytes --]

Emulate PTP_SYS_OFFSET by using an arithmetic mean of the
realtime samples from ->getcrosststamp callback.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 drivers/ptp/ptp_chardev.c        |    6 ++
 include/linux/ptp_clock_kernel.h |    5 ++
 include/linux/timekeeping_ptp.h  |   10 ++++
 kernel/time/timekeeping.c        |   79 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 100 insertions(+)

Index: kvm-ptpdriver/drivers/ptp/ptp_chardev.c
===================================================================
--- kvm-ptpdriver.orig/drivers/ptp/ptp_chardev.c	2017-01-19 15:34:52.181338789 -0200
+++ kvm-ptpdriver/drivers/ptp/ptp_chardev.c	2017-01-19 15:35:31.705421911 -0200
@@ -23,6 +23,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/timekeeping.h>
+#include <linux/timekeeping_ptp.h>
 
 #include "ptp_private.h"
 
@@ -219,6 +220,11 @@
 			err = -EINVAL;
 			break;
 		}
+
+		if (ptp->info->emulate_ptp_sys_offset_mean) {
+			err = emulate_ptp_sys_offset(ptp->info, sysoff, arg);
+			break;
+		}
 		pct = &sysoff->ts[0];
 		for (i = 0; i < sysoff->n_samples; i++) {
 			getnstimeofday64(&ts);
Index: kvm-ptpdriver/include/linux/ptp_clock_kernel.h
===================================================================
--- kvm-ptpdriver.orig/include/linux/ptp_clock_kernel.h	2017-01-19 15:34:52.181338789 -0200
+++ kvm-ptpdriver/include/linux/ptp_clock_kernel.h	2017-01-19 15:35:31.706421913 -0200
@@ -99,6 +99,10 @@
  *            parameter func: the desired function to use.
  *            parameter chan: the function channel index to use.
  *
+ * @emulate_ptp_sys_offset_mean: Emulate PTP_SYS_OFFSET ioctl
+ *            using arithmetic mean of series of PTP_SYS_OFFSET_PRECISE
+ *            ioctl.
+ *
  * Drivers should embed their ptp_clock_info within a private
  * structure, obtaining a reference to it using container_of().
  *
@@ -115,6 +119,7 @@
 	int n_pins;
 	int pps;
 	struct ptp_pin_desc *pin_config;
+	bool emulate_ptp_sys_offset_mean;
 	int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm);
 	int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
 	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
Index: kvm-ptpdriver/include/linux/timekeeping_ptp.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ kvm-ptpdriver/include/linux/timekeeping_ptp.h	2017-01-19 15:35:31.706421913 -0200
@@ -0,0 +1,10 @@
+#ifndef _LINUX_TIMEKEEPING_PTP_H
+#define _LINUX_TIMEKEEPING_PTP_H
+
+#include <uapi/linux/ptp_clock.h>
+#include <linux/ptp_clock_kernel.h>
+
+extern int emulate_ptp_sys_offset(struct ptp_clock_info *info,
+			struct ptp_sys_offset *sysoff,
+			unsigned long arg);
+#endif
Index: kvm-ptpdriver/kernel/time/timekeeping.c
===================================================================
--- kvm-ptpdriver.orig/kernel/time/timekeeping.c	2017-01-19 15:34:52.181338789 -0200
+++ kvm-ptpdriver/kernel/time/timekeeping.c	2017-01-20 09:29:13.978434149 -0200
@@ -23,6 +23,10 @@
 #include <linux/stop_machine.h>
 #include <linux/pvclock_gtod.h>
 #include <linux/compiler.h>
+#include <linux/timekeeping_ptp.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
 
 #include "tick-internal.h"
 #include "ntp_internal.h"
@@ -1168,6 +1172,81 @@
 }
 EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
 
+static void mean_sys_realtime(struct system_device_crosststamp *xtstamp,
+			      struct system_device_crosststamp *n_xtstamp,
+			      struct ptp_clock_time *pct)
+{
+	ktime_t sys_realtime, n_sys_realtime;
+	struct timespec ts;
+	u64 ns;
+
+	/* estimate realtime with arithmetic mean of two crosststamp samples */
+	sys_realtime = xtstamp->sys_realtime;
+	n_sys_realtime = n_xtstamp->sys_realtime;
+	ns = ktime_divns(ktime_add(sys_realtime, n_sys_realtime), 2);
+	ts = ktime_to_timespec(ns_to_ktime(ns));
+	pct->sec = ts.tv_sec;
+	pct->nsec = ts.tv_nsec;
+}
+
+int emulate_ptp_sys_offset(struct ptp_clock_info *info,
+				  struct ptp_sys_offset *sysoff,
+				  unsigned long arg)
+{
+	unsigned long flags;
+	int i, err, len;
+	int n_samples;
+	struct system_device_crosststamp *xtstamps, *xtstamp, *n_xtstamp;
+	struct ptp_clock_time *pct;
+
+	n_samples = sysoff->n_samples + 2;
+
+	len = sizeof(struct system_device_crosststamp);
+	xtstamps = kzalloc(len * n_samples, GFP_KERNEL);
+	if (xtstamps == NULL)
+		return -ENOMEM;
+
+	/* stop update_wall_time, settimeofday */
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
+
+	xtstamp = xtstamps;
+	for (i = 0; i < n_samples; i++) {
+		err = info->getcrosststamp(info, xtstamp);
+		if (err) {
+			kfree(xtstamps);
+			raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+			return err;
+		}
+		xtstamp++;
+	}
+
+	pct = &sysoff->ts[0];
+	xtstamp = n_xtstamp = xtstamps;
+	n_xtstamp++;
+	for (i = 0; i < sysoff->n_samples; i++) {
+		struct timespec ts;
+
+		mean_sys_realtime(xtstamp, n_xtstamp, pct);
+		pct++;
+
+		ts = ktime_to_timespec(n_xtstamp->device);
+		pct->sec = ts.tv_sec;
+		pct->nsec = ts.tv_nsec;
+		pct++;
+		xtstamp++;
+		n_xtstamp++;
+	}
+
+	mean_sys_realtime(xtstamp, n_xtstamp, pct);
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+	kfree(xtstamps);
+	if (copy_to_user((void __user *)arg, sysoff, sizeof(*sysoff)))
+		return -EFAULT;
+
+	return 0;
+}
+EXPORT_SYMBOL(emulate_ptp_sys_offset);
+
 /**
  * do_gettimeofday - Returns the time of day in a timeval
  * @tv:		pointer to the timeval to be set

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [patch 5/5] PTP: add kvm PTP driver
  2017-01-20 12:20 [patch 0/5] KVM virtual PTP driver (v3) Marcelo Tosatti
                   ` (3 preceding siblings ...)
  2017-01-20 12:20 ` [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure Marcelo Tosatti
@ 2017-01-20 12:20 ` Marcelo Tosatti
  2017-01-20 12:58   ` Paolo Bonzini
  2017-01-20 14:12   ` Radim Krcmar
  2017-01-20 13:10 ` [patch 0/5] KVM virtual PTP driver (v3) Paolo Bonzini
  5 siblings, 2 replies; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-20 12:20 UTC (permalink / raw)
  To: kvm, linux-kernel
  Cc: Paolo Bonzini, Radim Krcmar, Richard Cochran, Miroslav Lichvar,
	Marcelo Tosatti

[-- Attachment #1: kvm-ptpdriver --]
[-- Type: text/plain, Size: 7646 bytes --]

Add a driver with gettime method returning hosts realtime clock.
This allows Chrony to synchronize host and guest clocks with 
high precision (see results below).

chronyc> sources
MS Name/IP address         Stratum Poll Reach LastRx Last sample
===============================================================================
#* PHC0                          0   3   377     4   +162ns[ -683ns] +/-   11ns

To configure Chronyd to use PHC refclock, add the 
following line to its configuration file:

refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0

Where /dev/ptpX is the kvmclock PTP clock.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 drivers/ptp/Kconfig   |   12 ++
 drivers/ptp/Makefile  |    1 
 drivers/ptp/ptp_kvm.c |  213 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 226 insertions(+)

v2: check for kvmclock (Radim)
    initialize global variables before device registration (Radim)
v3: use cross timestamps callback (Paolo, Miroslav, Radim)

Index: kvm-ptpdriver/drivers/ptp/Kconfig
===================================================================
--- kvm-ptpdriver.orig/drivers/ptp/Kconfig	2017-01-20 10:03:44.458489214 -0200
+++ kvm-ptpdriver/drivers/ptp/Kconfig	2017-01-20 10:04:26.912597433 -0200
@@ -90,4 +90,16 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called ptp_pch.
 
+config PTP_1588_CLOCK_KVM
+	tristate "KVM virtual PTP clock"
+	depends on PTP_1588_CLOCK
+	depends on KVM_GUEST
+	default y
+	help
+	  This driver adds support for using kvm infrastructure as a PTP
+	  clock. This clock is only useful if you are using KVM guests.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called ptp_kvm.
+
 endmenu
Index: kvm-ptpdriver/drivers/ptp/Makefile
===================================================================
--- kvm-ptpdriver.orig/drivers/ptp/Makefile	2017-01-20 10:03:44.458489214 -0200
+++ kvm-ptpdriver/drivers/ptp/Makefile	2017-01-20 10:04:26.913597436 -0200
@@ -6,3 +6,4 @@
 obj-$(CONFIG_PTP_1588_CLOCK)		+= ptp.o
 obj-$(CONFIG_PTP_1588_CLOCK_IXP46X)	+= ptp_ixp46x.o
 obj-$(CONFIG_PTP_1588_CLOCK_PCH)	+= ptp_pch.o
+obj-$(CONFIG_PTP_1588_CLOCK_KVM)	+= ptp_kvm.o
Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c	2017-01-20 10:19:20.555311672 -0200
@@ -0,0 +1,213 @@
+/*
+ * Virtual PTP 1588 clock for use with KVM guests
+ *
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <uapi/linux/kvm_para.h>
+#include <asm/kvm_para.h>
+#include <asm/pvclock.h>
+#include <asm/kvmclock.h>
+#include <uapi/asm/kvm_para.h>
+
+#include <linux/ptp_clock_kernel.h>
+
+struct kvm_ptp_clock {
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info caps;
+};
+
+DEFINE_SPINLOCK(kvm_ptp_lock);
+
+static struct pvclock_vsyscall_time_info *hv_clock;
+
+static struct kvm_clock_offset clock_off;
+static phys_addr_t clock_off_gpa;
+
+/*
+ * system_counterval.cycles: kvmclock value com TSC do host.
+ * system_counterval.cs: kvmclock clocksource.
+ * device_time: host realtime clock.
+ *
+ */
+static int ptp_kvm_get_time_fn(ktime_t *device_time,
+			       struct system_counterval_t *system_counter,
+			       void *ctx)
+{
+	unsigned long ret;
+	struct timespec64 tspec;
+	unsigned version;
+	u8 flags;
+	int cpu;
+	struct pvclock_vcpu_time_info *src;
+
+	preempt_disable_notrace();
+	cpu = smp_processor_id();
+	src = &hv_clock[cpu].pvti;
+
+	spin_lock(&kvm_ptp_lock);
+
+	do {
+		/*
+		 * We are measuring the delay between
+		 * kvm_hypercall and rdtsc using TSC,
+		 * and converting that delta to
+		 * tsc_to_system_mul and tsc_shift
+		 * So any changes to tsc_to_system_mul
+		 * and tsc_shift in this region
+		 * invalidate the measurement.
+		 */
+		version = pvclock_read_begin(src);
+
+		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
+				     clock_off_gpa,
+				     KVM_CLOCK_PAIRING_WALLCLOCK);
+		if (ret != 0) {
+			pr_err("clock offset hypercall ret %lu\n", ret);
+			spin_unlock(&kvm_ptp_lock);
+			preempt_enable_notrace();
+			return -EOPNOTSUPP;
+		}
+
+		tspec.tv_sec = clock_off.sec;
+		tspec.tv_nsec = clock_off.nsec;
+		ret = __pvclock_read_cycles(src, clock_off.tsc);
+		flags = src->flags;
+	} while (pvclock_read_retry(src, version));
+
+	preempt_enable_notrace();
+
+	system_counter->cycles = ret;
+	system_counter->cs = get_kvmclock_cs();
+
+	tspec.tv_nsec = tspec.tv_nsec;
+
+	*device_time = timespec_to_ktime(tspec);
+
+	spin_unlock(&kvm_ptp_lock);
+
+	return 0;
+}
+
+static int ptp_kvm_getcrosststamp(struct ptp_clock_info *ptp,
+				  struct system_device_crosststamp *xtstamp)
+{
+	return get_device_system_crosststamp(ptp_kvm_get_time_fn, NULL,
+					     NULL, xtstamp);
+}
+
+/*
+ * PTP clock operations
+ */
+
+static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+	unsigned long ret;
+	struct timespec64 tspec;
+
+	spin_lock(&kvm_ptp_lock);
+
+	ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
+			     clock_off_gpa,
+			     KVM_CLOCK_PAIRING_WALLCLOCK);
+	if (ret != 0) {
+		pr_err("clock offset hypercall ret %lu\n", ret);
+		spin_unlock(&kvm_ptp_lock);
+		return -EOPNOTSUPP;
+	}
+
+	tspec.tv_sec = clock_off.sec;
+	tspec.tv_nsec = clock_off.nsec;
+	spin_unlock(&kvm_ptp_lock);
+
+	memcpy(ts, &tspec, sizeof(struct timespec64));
+
+	return 0;
+}
+
+static int ptp_kvm_settime(struct ptp_clock_info *ptp,
+			   const struct timespec64 *ts)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ptp_kvm_enable(struct ptp_clock_info *ptp,
+			  struct ptp_clock_request *rq, int on)
+{
+	return -EOPNOTSUPP;
+}
+
+static struct ptp_clock_info ptp_kvm_caps = {
+	.owner		= THIS_MODULE,
+	.name		= "KVM virtual PTP",
+	.max_adj	= 0,
+	.n_ext_ts	= 0,
+	.n_pins		= 0,
+	.pps		= 0,
+	.adjfreq	= ptp_kvm_adjfreq,
+	.adjtime	= ptp_kvm_adjtime,
+	.gettime64	= ptp_kvm_gettime,
+	.settime64	= ptp_kvm_settime,
+	.enable		= ptp_kvm_enable,
+	.getcrosststamp = ptp_kvm_getcrosststamp,
+	.emulate_ptp_sys_offset_mean = true,
+};
+
+/* module operations */
+
+static struct kvm_ptp_clock kvm_ptp_clock;
+
+static void __exit ptp_kvm_exit(void)
+{
+	ptp_clock_unregister(kvm_ptp_clock.ptp_clock);
+}
+
+static int __init ptp_kvm_init(void)
+{
+	clock_off_gpa = slow_virt_to_phys(&clock_off);
+	hv_clock = pvclock_pvti_cpu0_va();
+
+	if (!hv_clock)
+		return -ENODEV;
+
+	kvm_ptp_clock.caps = ptp_kvm_caps;
+
+	kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL);
+
+	if (IS_ERR(kvm_ptp_clock.ptp_clock))
+		return PTR_ERR(kvm_ptp_clock.ptp_clock);
+
+	return 0;
+}
+
+module_init(ptp_kvm_init);
+module_exit(ptp_kvm_exit);
+
+MODULE_AUTHOR("Marcelo Tosatti <mtosatti@redhat.com>");
+MODULE_DESCRIPTION("PTP clock using KVMCLOCK");
+MODULE_LICENSE("GPL");

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-20 12:20 ` [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure Marcelo Tosatti
@ 2017-01-20 12:55   ` Paolo Bonzini
  2017-01-20 13:07     ` Marcelo Tosatti
  2017-01-20 20:25   ` Richard Cochran
  1 sibling, 1 reply; 36+ messages in thread
From: Paolo Bonzini @ 2017-01-20 12:55 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm, linux-kernel
  Cc: Radim Krcmar, Richard Cochran, Miroslav Lichvar



On 20/01/2017 13:20, Marcelo Tosatti wrote:
>  kernel/time/timekeeping.c        |   79 +++++++++++++++++++++++++++++++++++++++

Why not leave this in drivers/ptp/ptp_chardev.c?

> +		if (ptp->info->emulate_ptp_sys_offset_mean) {
> +			err = emulate_ptp_sys_offset(ptp->info, sysoff, arg);
> +			break;
> +		}

I think this should be simply "if (!ptp->info->gettime64)" and,
likewise, there should be an emulation based getcrosststamp in
ptp_clock_gettime.

Paolo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 3/5] kvmclock: export kvmclock clocksource pointer
  2017-01-20 12:20 ` [patch 3/5] kvmclock: export kvmclock clocksource pointer Marcelo Tosatti
@ 2017-01-20 12:55   ` Paolo Bonzini
  0 siblings, 0 replies; 36+ messages in thread
From: Paolo Bonzini @ 2017-01-20 12:55 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm, linux-kernel
  Cc: Radim Krcmar, Richard Cochran, Miroslav Lichvar



On 20/01/2017 13:20, Marcelo Tosatti wrote:
> To be used by KVM PTP driver.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> ---
>  arch/x86/include/asm/kvmclock.h |    6 ++++++
>  arch/x86/kernel/kvmclock.c      |    6 ++++++
>  2 files changed, 12 insertions(+)
> 
> Index: kvm-ptpdriver/arch/x86/include/asm/kvmclock.h
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ kvm-ptpdriver/arch/x86/include/asm/kvmclock.h	2017-01-18 18:20:35.463472189 -0200
> @@ -0,0 +1,6 @@
> +#ifndef _ASM_X86_KVM_CLOCK_H
> +#define _ASM_X86_KVM_CLOCK_H
> +
> +struct clocksource *get_kvmclock_cs(void);
> +
> +#endif /* _ASM_X86_KVM_CLOCK_H */
> Index: kvm-ptpdriver/arch/x86/kernel/kvmclock.c
> ===================================================================
> --- kvm-ptpdriver.orig/arch/x86/kernel/kvmclock.c	2017-01-11 21:22:13.306315984 -0200
> +++ kvm-ptpdriver/arch/x86/kernel/kvmclock.c	2017-01-18 18:20:02.543397125 -0200
> @@ -28,6 +28,7 @@
>  
>  #include <asm/x86_init.h>
>  #include <asm/reboot.h>
> +#include <asm/kvmclock.h>
>  
>  static int kvmclock __ro_after_init = 1;
>  static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
> @@ -182,6 +183,11 @@
>  	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
>  };
>  
> +struct clocksource *get_kvmclock_cs(void)
> +{
> +	return &kvm_clock;

You can export kvm_clock directly (rename it to clocksource_kvmclock if
you prefer).

Paolo

> +}
> +
>  int kvm_register_clock(char *txt)
>  {
>  	int cpu = smp_processor_id();
> 
> 

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 5/5] PTP: add kvm PTP driver
  2017-01-20 12:20 ` [patch 5/5] PTP: add kvm PTP driver Marcelo Tosatti
@ 2017-01-20 12:58   ` Paolo Bonzini
  2017-01-20 13:11     ` Marcelo Tosatti
  2017-01-20 14:12   ` Radim Krcmar
  1 sibling, 1 reply; 36+ messages in thread
From: Paolo Bonzini @ 2017-01-20 12:58 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm, linux-kernel
  Cc: Radim Krcmar, Richard Cochran, Miroslav Lichvar



On 20/01/2017 13:20, Marcelo Tosatti wrote:
> +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
> +				     clock_off_gpa,
> +				     KVM_CLOCK_PAIRING_WALLCLOCK);
> +		if (ret != 0) {
> +			pr_err("clock offset hypercall ret %lu\n", ret);
> +			spin_unlock(&kvm_ptp_lock);
> +			preempt_enable_notrace();
> +			return -EOPNOTSUPP;
> +		}
> +

Is it worth making this hypercall, or even all of ptp_kvm_get_time_fn, a
pv_ops entry?

But this looks good already, apart from my different preference on
emulate_ptp_sys_offset_mean.

Paolo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-20 12:55   ` Paolo Bonzini
@ 2017-01-20 13:07     ` Marcelo Tosatti
  2017-01-20 13:36       ` Paolo Bonzini
  0 siblings, 1 reply; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-20 13:07 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, linux-kernel, Radim Krcmar, Richard Cochran, Miroslav Lichvar

On Fri, Jan 20, 2017 at 01:55:27PM +0100, Paolo Bonzini wrote:
> 
> 
> On 20/01/2017 13:20, Marcelo Tosatti wrote:
> >  kernel/time/timekeeping.c        |   79 +++++++++++++++++++++++++++++++++++++++
> 
> Why not leave this in drivers/ptp/ptp_chardev.c?

timekeeper_lock

> > +		if (ptp->info->emulate_ptp_sys_offset_mean) {
> > +			err = emulate_ptp_sys_offset(ptp->info, sysoff, arg);
> > +			break;
> > +		}
> 
> I think this should be simply "if (!ptp->info->gettime64)" and,
> likewise, there should be an emulation based getcrosststamp in
> ptp_clock_gettime.
> 
> Paolo

gettime64 is called directly via ptp_clock_gettime.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 0/5] KVM virtual PTP driver (v3)
  2017-01-20 12:20 [patch 0/5] KVM virtual PTP driver (v3) Marcelo Tosatti
                   ` (4 preceding siblings ...)
  2017-01-20 12:20 ` [patch 5/5] PTP: add kvm PTP driver Marcelo Tosatti
@ 2017-01-20 13:10 ` Paolo Bonzini
  5 siblings, 0 replies; 36+ messages in thread
From: Paolo Bonzini @ 2017-01-20 13:10 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm, linux-kernel
  Cc: Radim Krcmar, Richard Cochran, Miroslav Lichvar



On 20/01/2017 13:20, Marcelo Tosatti wrote:
> PHC0                          0   3   377     7   -141ns[ +234ns] ±    7ns
> #* PHC0                          0   3   377     5   -197ns[ -403ns] ±    3ns
> #* PHC0                          0   3   377    11   +157ns[ +264ns] ±    3ns
> #* PHC0                          0   3   377     9    +92ns[ +304ns] ±    2ns
> #* PHC0                          0   3   377     7   -102ns[ -164ns] ±    3ns
> #* PHC0                          0   3   377     5   -157ns[ -269ns] ±    2ns
> #* PHC0                          0   3   377    10    -63ns[  -98ns] ±    2ns
> #* PHC0                          0   3   377     8   +351ns[ +399ns] ±    2ns
> #* PHC0                          0   3   377     6   +385ns[ +399ns] ±    4ns
> #* PHC0                          0   3   377     4   -133ns[ -110ns] ±    6ns
> #* PHC0                          0   3   377    10    -59ns[  -90ns] ±    3ns
> #* PHC0                          0   3   377     8    -90ns[ +381ns] ±    9ns
> #* PHC0                          0   3   377     6     +6ns[ +216ns] ±    6ns
> #* PHC0                          0   3   377     4   +166ns[ -666ns] ±   11ns
> #* PHC0                          0   3   377    10    -18ns[ +323ns] ±   10ns
> #* PHC0                          0   3   377     8    -12ns[ +121ns] ±    5ns
> #* PHC0                          0   3   377     5     +4ns[ +218ns] ±    7ns
> #* PHC0                          0   3   377     4   +162ns[ -683ns] ±   11ns
> #* PHC0                          0   3   377    10    -82ns[ +310ns] ±   12ns
> #* PHC0                          0   3   377     7     +5ns[ -320ns] ±    9ns
> #* PHC0                          0   3   377     5    -13ns[ +165ns] ±    7ns
> #* PHC0                          0   3   377     3     +6ns[ +105ns] ±    2ns
> #* PHC0                          0   3   377     9    -19ns[  -67ns] ±    3ns
> #* PHC0                          0   3   377     8    +89ns[ +181ns] ±    3ns
> #* PHC0                          0   3   377     6    +93ns[ +168ns] ±    2ns
> #* PHC0                          0   3   377     4   +100ns[ +154ns] ±    2ns
> #* PHC0                          0   3   377    10   -249ns[ +180ns] ±    7ns
> #* PHC0                          0   3   377     8   +158ns[ -582ns] ±   22ns
> #* PHC0                          0   3   377     6    +10ns[ +213ns] ±    9ns
> #* PHC0                          0   3   377     3    -35ns[ +258ns] ±    5ns
> #* PHC0                          0   3   377    10    +25ns[ -620ns] ±    8ns
> #* PHC0                          0   3   377     7    +13ns[ +153ns] ±    2ns
> 

Wow.  It feels good when theory matches practice. :)

Paolo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 5/5] PTP: add kvm PTP driver
  2017-01-20 12:58   ` Paolo Bonzini
@ 2017-01-20 13:11     ` Marcelo Tosatti
  0 siblings, 0 replies; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-20 13:11 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, linux-kernel, Radim Krcmar, Richard Cochran, Miroslav Lichvar

On Fri, Jan 20, 2017 at 01:58:33PM +0100, Paolo Bonzini wrote:
> 
> 
> On 20/01/2017 13:20, Marcelo Tosatti wrote:
> > +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
> > +				     clock_off_gpa,
> > +				     KVM_CLOCK_PAIRING_WALLCLOCK);
> > +		if (ret != 0) {
> > +			pr_err("clock offset hypercall ret %lu\n", ret);
> > +			spin_unlock(&kvm_ptp_lock);
> > +			preempt_enable_notrace();
> > +			return -EOPNOTSUPP;
> > +		}
> > +
> 
> Is it worth making this hypercall, or even all of ptp_kvm_get_time_fn, a
> pv_ops entry?

Well, i don't know how Xen is going to implement this. Maybe you can, 
when you have more low level implementations to be able to generalize.

> But this looks good already, apart from my different preference on
> emulate_ptp_sys_offset_mean.

Sending v4 with the direct export of kvm clocksource.

Thanks!

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-20 13:07     ` Marcelo Tosatti
@ 2017-01-20 13:36       ` Paolo Bonzini
  2017-01-20 13:52         ` Marcelo Tosatti
  2017-01-20 14:02         ` Radim Krcmar
  0 siblings, 2 replies; 36+ messages in thread
From: Paolo Bonzini @ 2017-01-20 13:36 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, linux-kernel, Radim Krcmar, Richard Cochran, Miroslav Lichvar



On 20/01/2017 14:07, Marcelo Tosatti wrote:
> On Fri, Jan 20, 2017 at 01:55:27PM +0100, Paolo Bonzini wrote:
>>
>>
>> On 20/01/2017 13:20, Marcelo Tosatti wrote:
>>>  kernel/time/timekeeping.c        |   79 +++++++++++++++++++++++++++++++++++++++
>>
>> Why not leave this in drivers/ptp/ptp_chardev.c?
> 
> timekeeper_lock

Why does emulate_ptp_sys_offset need it, if the current PTP_SYS_OFFSET
code doesn't?  Is the latency acceptable (considering this is a raw spin
lock) or is there a seqlock that we can use instead (such as tk_core.seq
like in get_device_system_crosststamp)?

>>> +		if (ptp->info->emulate_ptp_sys_offset_mean) {
>>> +			err = emulate_ptp_sys_offset(ptp->info, sysoff, arg);
>>> +			break;
>>> +		}
>>
>> I think this should be simply "if (!ptp->info->gettime64)" and,
>> likewise, there should be an emulation based getcrosststamp in
>> ptp_clock_gettime.
>>
>> Paolo
> 
> gettime64 is called directly via ptp_clock_gettime.

Yes, but ptp_clock_gettime can be taught to use getcrosststamp instead.

Paolo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-20 13:36       ` Paolo Bonzini
@ 2017-01-20 13:52         ` Marcelo Tosatti
  2017-01-20 14:02         ` Radim Krcmar
  1 sibling, 0 replies; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-20 13:52 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, linux-kernel, Radim Krcmar, Richard Cochran, Miroslav Lichvar

On Fri, Jan 20, 2017 at 02:36:40PM +0100, Paolo Bonzini wrote:
> 
> 
> On 20/01/2017 14:07, Marcelo Tosatti wrote:
> > On Fri, Jan 20, 2017 at 01:55:27PM +0100, Paolo Bonzini wrote:
> >>
> >>
> >> On 20/01/2017 13:20, Marcelo Tosatti wrote:
> >>>  kernel/time/timekeeping.c        |   79 +++++++++++++++++++++++++++++++++++++++
> >>
> >> Why not leave this in drivers/ptp/ptp_chardev.c?
> > 
> > timekeeper_lock
> 
> Why does emulate_ptp_sys_offset need it, if the current PTP_SYS_OFFSET
> code doesn't? 

Because if time is adjusted while you are taking the samples, 
the mean can return non existant values:

1) take sample1  (realtime = 2000)
2) userspace changes realtime (realtime = 100)
3) 2100/2 = 1050

However that 1050 value never existed, before or after
userspace changed realtime.
Such behaviour does not exist with PTP_SYS_OFFSET, because 
taking getnstimeofday64 is serialized against time changes.

I am not sure whether returning such bizzare values is fine, to 
drop the lock.

Hum... i think it must be because userspace will consider
the new values after realtime is changed as correct.

>  Is the latency acceptable (considering this is a raw spin
> lock) or is there a seqlock that we can use instead (such as tk_core.seq
> like in get_device_system_crosststamp)?

Well can move it after the ->getcrosststamp loop. 
I'll just drop the spinlock and document the behaviour.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-20 13:36       ` Paolo Bonzini
  2017-01-20 13:52         ` Marcelo Tosatti
@ 2017-01-20 14:02         ` Radim Krcmar
  2017-01-20 14:23           ` Paolo Bonzini
  1 sibling, 1 reply; 36+ messages in thread
From: Radim Krcmar @ 2017-01-20 14:02 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Marcelo Tosatti, kvm, linux-kernel, Richard Cochran, Miroslav Lichvar

2017-01-20 14:36+0100, Paolo Bonzini:
> On 20/01/2017 14:07, Marcelo Tosatti wrote:
>> On Fri, Jan 20, 2017 at 01:55:27PM +0100, Paolo Bonzini wrote:
>>>
>>>
>>> On 20/01/2017 13:20, Marcelo Tosatti wrote:
>>>>  kernel/time/timekeeping.c        |   79 +++++++++++++++++++++++++++++++++++++++
>>>
>>> Why not leave this in drivers/ptp/ptp_chardev.c?
>> 
>> timekeeper_lock
> 
> Why does emulate_ptp_sys_offset need it, if the current PTP_SYS_OFFSET
> code doesn't?  Is the latency acceptable (considering this is a raw spin
> lock) or is there a seqlock that we can use instead (such as tk_core.seq
> like in get_device_system_crosststamp)?

The spinlock prevents writers to take the tk_core.seq, which means that
time cannot be changed during that.

The simplest alternative would be to use tk_core.seq for all our reads,
but that would increse the chance of re-reading, even infinitely.

But we don't need to read everything with the same time base -- if the
time is changed (by NTP/user/...) between our reads, then the value will
be off, but if writer took tk_core.seq just to accumulate current time,
then the time after accumulation stays the same and it would work as if
we had the tk_core.seq for the whole time.

Another solution would be to do just one one read and set it to all
saples -- the difference between t[i] and t[i+2] would be 0.  We are
quite sure the just one read is enough, this hack could be even better.

>>>> +		if (ptp->info->emulate_ptp_sys_offset_mean) {
>>>> +			err = emulate_ptp_sys_offset(ptp->info, sysoff, arg);
>>>> +			break;
>>>> +		}
>>>
>>> I think this should be simply "if (!ptp->info->gettime64)" and,
>>> likewise, there should be an emulation based getcrosststamp in
>>> ptp_clock_gettime.
>>>
>>> Paolo
>> 
>> gettime64 is called directly via ptp_clock_gettime.
> 
> Yes, but ptp_clock_gettime can be taught to use getcrosststamp instead.

I agree,

  if (!gettime64)
      use getcrosststamp

and KVM PTP device will not implement gettime64().

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 5/5] PTP: add kvm PTP driver
  2017-01-20 12:20 ` [patch 5/5] PTP: add kvm PTP driver Marcelo Tosatti
  2017-01-20 12:58   ` Paolo Bonzini
@ 2017-01-20 14:12   ` Radim Krcmar
  2017-01-20 14:20     ` Radim Krcmar
  2017-01-20 15:00     ` Marcelo Tosatti
  1 sibling, 2 replies; 36+ messages in thread
From: Radim Krcmar @ 2017-01-20 14:12 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, linux-kernel, Paolo Bonzini, Richard Cochran, Miroslav Lichvar

2017-01-20 10:20-0200, Marcelo Tosatti:
> Add a driver with gettime method returning hosts realtime clock.
> This allows Chrony to synchronize host and guest clocks with 
> high precision (see results below).
> 
> chronyc> sources
> MS Name/IP address         Stratum Poll Reach LastRx Last sample
> ===============================================================================
> #* PHC0                          0   3   377     4   +162ns[ -683ns] +/-   11ns
> 
> To configure Chronyd to use PHC refclock, add the 
> following line to its configuration file:
> 
> refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0
> 
> Where /dev/ptpX is the kvmclock PTP clock.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> ---
>  drivers/ptp/Kconfig   |   12 ++
>  drivers/ptp/Makefile  |    1 
>  drivers/ptp/ptp_kvm.c |  213 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 226 insertions(+)
> 
> v2: check for kvmclock (Radim)
>     initialize global variables before device registration (Radim)
> v3: use cross timestamps callback (Paolo, Miroslav, Radim)
> 
> Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c	2017-01-20 10:19:20.555311672 -0200
> @@ -0,0 +1,213 @@
> +/*
> + * Virtual PTP 1588 clock for use with KVM guests
> + *
> + * Copyright (C) 2017 Red Hat Inc.
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; either version 2 of the License, or
> + *  (at your option) any later version.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + */
> +#include <linux/device.h>
> +#include <linux/err.h>
> +#include <linux/init.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <uapi/linux/kvm_para.h>
> +#include <asm/kvm_para.h>
> +#include <asm/pvclock.h>
> +#include <asm/kvmclock.h>
> +#include <uapi/asm/kvm_para.h>
> +
> +#include <linux/ptp_clock_kernel.h>
> +
> +struct kvm_ptp_clock {
> +	struct ptp_clock *ptp_clock;
> +	struct ptp_clock_info caps;
> +};
> +
> +DEFINE_SPINLOCK(kvm_ptp_lock);
> +
> +static struct pvclock_vsyscall_time_info *hv_clock;
> +
> +static struct kvm_clock_offset clock_off;
> +static phys_addr_t clock_off_gpa;
> +
> +/*
> + * system_counterval.cycles: kvmclock value com TSC do host.
> + * system_counterval.cs: kvmclock clocksource.
> + * device_time: host realtime clock.
> + *
> + */
> +static int ptp_kvm_get_time_fn(ktime_t *device_time,
> +			       struct system_counterval_t *system_counter,
> +			       void *ctx)
> +{
> +	unsigned long ret;
> +	struct timespec64 tspec;
> +	unsigned version;
> +	u8 flags;
> +	int cpu;
> +	struct pvclock_vcpu_time_info *src;
> +
> +	preempt_disable_notrace();
> +	cpu = smp_processor_id();
> +	src = &hv_clock[cpu].pvti;
> +
> +	spin_lock(&kvm_ptp_lock);

What does the lock prevent?

> +
> +	do {
> +		/*
> +		 * We are measuring the delay between
> +		 * kvm_hypercall and rdtsc using TSC,
> +		 * and converting that delta to
> +		 * tsc_to_system_mul and tsc_shift
> +		 * So any changes to tsc_to_system_mul
> +		 * and tsc_shift in this region
> +		 * invalidate the measurement.
> +		 */
> +		version = pvclock_read_begin(src);
> +
> +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
> +				     clock_off_gpa,
> +				     KVM_CLOCK_PAIRING_WALLCLOCK);
> +		if (ret != 0) {
> +			pr_err("clock offset hypercall ret %lu\n", ret);
> +			spin_unlock(&kvm_ptp_lock);
> +			preempt_enable_notrace();
> +			return -EOPNOTSUPP;
> +		}
> +
> +		tspec.tv_sec = clock_off.sec;
> +		tspec.tv_nsec = clock_off.nsec;
> +		ret = __pvclock_read_cycles(src, clock_off.tsc);
> +		flags = src->flags;
> +	} while (pvclock_read_retry(src, version));
> +
> +	preempt_enable_notrace();
> +
> +	system_counter->cycles = ret;
> +	system_counter->cs = get_kvmclock_cs();

Can't we use clocksource_tsc and just pass the tsc without kvmclock in
the middle?

> +	tspec.tv_nsec = tspec.tv_nsec;

(This looks extraneous.)

Thanks.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 5/5] PTP: add kvm PTP driver
  2017-01-20 14:12   ` Radim Krcmar
@ 2017-01-20 14:20     ` Radim Krcmar
  2017-01-20 15:00     ` Marcelo Tosatti
  1 sibling, 0 replies; 36+ messages in thread
From: Radim Krcmar @ 2017-01-20 14:20 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, linux-kernel, Paolo Bonzini, Richard Cochran, Miroslav Lichvar

2017-01-20 15:12+0100, Radim Krcmar:
> 2017-01-20 10:20-0200, Marcelo Tosatti:
>> +	spin_lock(&kvm_ptp_lock);
> 
> What does the lock prevent?

clock_off/clock_off_gpa. :)

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-20 14:02         ` Radim Krcmar
@ 2017-01-20 14:23           ` Paolo Bonzini
  2017-01-20 14:31             ` Miroslav Lichvar
  2017-01-20 18:30             ` Radim Krcmar
  0 siblings, 2 replies; 36+ messages in thread
From: Paolo Bonzini @ 2017-01-20 14:23 UTC (permalink / raw)
  To: Radim Krcmar
  Cc: Marcelo Tosatti, kvm, linux-kernel, Richard Cochran, Miroslav Lichvar



On 20/01/2017 15:02, Radim Krcmar wrote:
> 2017-01-20 14:36+0100, Paolo Bonzini:
>> On 20/01/2017 14:07, Marcelo Tosatti wrote:
>>> On Fri, Jan 20, 2017 at 01:55:27PM +0100, Paolo Bonzini wrote:
>>>>
>>>>
>>>> On 20/01/2017 13:20, Marcelo Tosatti wrote:
>>>>>  kernel/time/timekeeping.c        |   79 +++++++++++++++++++++++++++++++++++++++
>>>>
>>>> Why not leave this in drivers/ptp/ptp_chardev.c?
>>>
>>> timekeeper_lock
>>
>> Why does emulate_ptp_sys_offset need it, if the current PTP_SYS_OFFSET
>> code doesn't?  Is the latency acceptable (considering this is a raw spin
>> lock) or is there a seqlock that we can use instead (such as tk_core.seq
>> like in get_device_system_crosststamp)?
> 
> The spinlock prevents writers to take the tk_core.seq, which means that
> time cannot be changed during that.
> 
> The simplest alternative would be to use tk_core.seq for all our reads,
> but that would increse the chance of re-reading, even infinitely.

How much?  If a hypercall takes 1 microsecond, and PTP_MAX_SAMPLES is
25, we should be done in less than 50 microseconds.  If update_wall-time
is called with 250 Hz frequency (sounds like a lot), that's still 4000
microseconds so the probability of even 3-4 consecutive retries should
be very low.

> But we don't need to read everything with the same time base -- if the
> time is changed (by NTP/user/...) between our reads, then the value will
> be off, but if writer took tk_core.seq just to accumulate current time,
> then the time after accumulation stays the same and it would work as if
> we had the tk_core.seq for the whole time.

You mean only check seqlock separately for each sample, but restart the
entire loop upon changes to cs_was_changed_seq or clock_was_set_seq?
That would work too.

> Another solution would be to do just one one read and set it to all
> saples -- the difference between t[i] and t[i+2] would be 0.  We are
> quite sure the just one read is enough, this hack could be even better.

I'd be afraid of messing up chrony's stats...

Paolo

>>>>> +		if (ptp->info->emulate_ptp_sys_offset_mean) {
>>>>> +			err = emulate_ptp_sys_offset(ptp->info, sysoff, arg);
>>>>> +			break;
>>>>> +		}
>>>>
>>>> I think this should be simply "if (!ptp->info->gettime64)" and,
>>>> likewise, there should be an emulation based getcrosststamp in
>>>> ptp_clock_gettime.
>>>>
>>>> Paolo
>>>
>>> gettime64 is called directly via ptp_clock_gettime.
>>
>> Yes, but ptp_clock_gettime can be taught to use getcrosststamp instead.
> 
> I agree,
> 
>   if (!gettime64)
>       use getcrosststamp
> 
> and KVM PTP device will not implement gettime64().
> 

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-20 14:23           ` Paolo Bonzini
@ 2017-01-20 14:31             ` Miroslav Lichvar
  2017-01-20 18:30             ` Radim Krcmar
  1 sibling, 0 replies; 36+ messages in thread
From: Miroslav Lichvar @ 2017-01-20 14:31 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Radim Krcmar, Marcelo Tosatti, kvm, linux-kernel, Richard Cochran

On Fri, Jan 20, 2017 at 03:23:08PM +0100, Paolo Bonzini wrote:
> On 20/01/2017 15:02, Radim Krcmar wrote:
> > Another solution would be to do just one one read and set it to all
> > saples -- the difference between t[i] and t[i+2] would be 0.  We are
> > quite sure the just one read is enough, this hack could be even better.
> 
> I'd be afraid of messing up chrony's stats...

This should be ok. For the application it just looks as if the CPU and
everything else in the path of the measurements was infinitely fast. :)

-- 
Miroslav Lichvar

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 5/5] PTP: add kvm PTP driver
  2017-01-20 14:12   ` Radim Krcmar
  2017-01-20 14:20     ` Radim Krcmar
@ 2017-01-20 15:00     ` Marcelo Tosatti
  2017-01-20 17:11       ` Paolo Bonzini
  2017-01-20 18:08       ` Radim Krcmar
  1 sibling, 2 replies; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-20 15:00 UTC (permalink / raw)
  To: Radim Krcmar
  Cc: kvm, linux-kernel, Paolo Bonzini, Richard Cochran, Miroslav Lichvar

On Fri, Jan 20, 2017 at 03:12:56PM +0100, Radim Krcmar wrote:
> 2017-01-20 10:20-0200, Marcelo Tosatti:
> > Add a driver with gettime method returning hosts realtime clock.
> > This allows Chrony to synchronize host and guest clocks with 
> > high precision (see results below).
> > 
> > chronyc> sources
> > MS Name/IP address         Stratum Poll Reach LastRx Last sample
> > ===============================================================================
> > #* PHC0                          0   3   377     4   +162ns[ -683ns] +/-   11ns
> > 
> > To configure Chronyd to use PHC refclock, add the 
> > following line to its configuration file:
> > 
> > refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0
> > 
> > Where /dev/ptpX is the kvmclock PTP clock.
> > 
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> > 
> > ---
> >  drivers/ptp/Kconfig   |   12 ++
> >  drivers/ptp/Makefile  |    1 
> >  drivers/ptp/ptp_kvm.c |  213 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 226 insertions(+)
> > 
> > v2: check for kvmclock (Radim)
> >     initialize global variables before device registration (Radim)
> > v3: use cross timestamps callback (Paolo, Miroslav, Radim)
> > 
> > Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
> > ===================================================================
> > --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> > +++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c	2017-01-20 10:19:20.555311672 -0200
> > @@ -0,0 +1,213 @@
> > +/*
> > + * Virtual PTP 1588 clock for use with KVM guests
> > + *
> > + * Copyright (C) 2017 Red Hat Inc.
> > + *
> > + *  This program is free software; you can redistribute it and/or modify
> > + *  it under the terms of the GNU General Public License as published by
> > + *  the Free Software Foundation; either version 2 of the License, or
> > + *  (at your option) any later version.
> > + *
> > + *  This program is distributed in the hope that it will be useful,
> > + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + *  GNU General Public License for more details.
> > + *
> > + */
> > +#include <linux/device.h>
> > +#include <linux/err.h>
> > +#include <linux/init.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <uapi/linux/kvm_para.h>
> > +#include <asm/kvm_para.h>
> > +#include <asm/pvclock.h>
> > +#include <asm/kvmclock.h>
> > +#include <uapi/asm/kvm_para.h>
> > +
> > +#include <linux/ptp_clock_kernel.h>
> > +
> > +struct kvm_ptp_clock {
> > +	struct ptp_clock *ptp_clock;
> > +	struct ptp_clock_info caps;
> > +};
> > +
> > +DEFINE_SPINLOCK(kvm_ptp_lock);
> > +
> > +static struct pvclock_vsyscall_time_info *hv_clock;
> > +
> > +static struct kvm_clock_offset clock_off;
> > +static phys_addr_t clock_off_gpa;
> > +
> > +/*
> > + * system_counterval.cycles: kvmclock value com TSC do host.
> > + * system_counterval.cs: kvmclock clocksource.
> > + * device_time: host realtime clock.
> > + *
> > + */
> > +static int ptp_kvm_get_time_fn(ktime_t *device_time,
> > +			       struct system_counterval_t *system_counter,
> > +			       void *ctx)
> > +{
> > +	unsigned long ret;
> > +	struct timespec64 tspec;
> > +	unsigned version;
> > +	u8 flags;
> > +	int cpu;
> > +	struct pvclock_vcpu_time_info *src;
> > +
> > +	preempt_disable_notrace();
> > +	cpu = smp_processor_id();
> > +	src = &hv_clock[cpu].pvti;
> > +
> > +	spin_lock(&kvm_ptp_lock);
> 
> What does the lock prevent?

Protects access to "struct kvm_clock_offset clock_off".

> > +
> > +	do {
> > +		/*
> > +		 * We are measuring the delay between
> > +		 * kvm_hypercall and rdtsc using TSC,
> > +		 * and converting that delta to
> > +		 * tsc_to_system_mul and tsc_shift
> > +		 * So any changes to tsc_to_system_mul
> > +		 * and tsc_shift in this region
> > +		 * invalidate the measurement.
> > +		 */
> > +		version = pvclock_read_begin(src);
> > +
> > +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
> > +				     clock_off_gpa,
> > +				     KVM_CLOCK_PAIRING_WALLCLOCK);
> > +		if (ret != 0) {
> > +			pr_err("clock offset hypercall ret %lu\n", ret);
> > +			spin_unlock(&kvm_ptp_lock);
> > +			preempt_enable_notrace();
> > +			return -EOPNOTSUPP;
> > +		}
> > +
> > +		tspec.tv_sec = clock_off.sec;
> > +		tspec.tv_nsec = clock_off.nsec;
> > +		ret = __pvclock_read_cycles(src, clock_off.tsc);
> > +		flags = src->flags;
> > +	} while (pvclock_read_retry(src, version));
> > +
> > +	preempt_enable_notrace();
> > +
> > +	system_counter->cycles = ret;
> > +	system_counter->cs = get_kvmclock_cs();
> 
> Can't we use clocksource_tsc and just pass the tsc without kvmclock in
> the middle?

No, it has to be the kvmclock value.

> > +	tspec.tv_nsec = tspec.tv_nsec;

Oops, yes, will resend only this patch if there are no further comments.

> 
> (This looks extraneous.)
> 
> Thanks.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 5/5] PTP: add kvm PTP driver
  2017-01-20 15:00     ` Marcelo Tosatti
@ 2017-01-20 17:11       ` Paolo Bonzini
  2017-01-20 18:08       ` Radim Krcmar
  1 sibling, 0 replies; 36+ messages in thread
From: Paolo Bonzini @ 2017-01-20 17:11 UTC (permalink / raw)
  To: Marcelo Tosatti, Radim Krcmar
  Cc: kvm, linux-kernel, Richard Cochran, Miroslav Lichvar



On 20/01/2017 16:00, Marcelo Tosatti wrote:
>>> +	system_counter->cs = get_kvmclock_cs();
>>
>> Can't we use clocksource_tsc and just pass the tsc without kvmclock in
>> the middle?
> 
> No, it has to be the kvmclock value.

And that would be too tricky anyway. :)

Paolo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 5/5] PTP: add kvm PTP driver
  2017-01-20 15:00     ` Marcelo Tosatti
  2017-01-20 17:11       ` Paolo Bonzini
@ 2017-01-20 18:08       ` Radim Krcmar
  2017-01-20 19:10         ` Marcelo Tosatti
  2017-01-21  8:02         ` Paolo Bonzini
  1 sibling, 2 replies; 36+ messages in thread
From: Radim Krcmar @ 2017-01-20 18:08 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, linux-kernel, Paolo Bonzini, Richard Cochran, Miroslav Lichvar

2017-01-20 13:00-0200, Marcelo Tosatti:
> On Fri, Jan 20, 2017 at 03:12:56PM +0100, Radim Krcmar wrote:
>> 2017-01-20 10:20-0200, Marcelo Tosatti:
>> > +	do {
>> > +		/*
>> > +		 * We are measuring the delay between
>> > +		 * kvm_hypercall and rdtsc using TSC,
>> > +		 * and converting that delta to
>> > +		 * tsc_to_system_mul and tsc_shift
>> > +		 * So any changes to tsc_to_system_mul
>> > +		 * and tsc_shift in this region
>> > +		 * invalidate the measurement.
>> > +		 */
>> > +		version = pvclock_read_begin(src);
>> > +
>> > +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
>> > +				     clock_off_gpa,
>> > +				     KVM_CLOCK_PAIRING_WALLCLOCK);
>> > +		if (ret != 0) {
>> > +			pr_err("clock offset hypercall ret %lu\n", ret);
>> > +			spin_unlock(&kvm_ptp_lock);
>> > +			preempt_enable_notrace();
>> > +			return -EOPNOTSUPP;
>> > +		}
>> > +
>> > +		tspec.tv_sec = clock_off.sec;
>> > +		tspec.tv_nsec = clock_off.nsec;
>> > +		ret = __pvclock_read_cycles(src, clock_off.tsc);
>> > +		flags = src->flags;
>> > +	} while (pvclock_read_retry(src, version));
>> > +
>> > +	preempt_enable_notrace();
>> > +
>> > +	system_counter->cycles = ret;
>> > +	system_counter->cs = get_kvmclock_cs();
>> 
>> Can't we use clocksource_tsc and just pass the tsc without kvmclock in
>> the middle?
> 
> No, it has to be the kvmclock value.

What happens if the guest switches from kvmclock to tsc?

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-20 14:23           ` Paolo Bonzini
  2017-01-20 14:31             ` Miroslav Lichvar
@ 2017-01-20 18:30             ` Radim Krcmar
  1 sibling, 0 replies; 36+ messages in thread
From: Radim Krcmar @ 2017-01-20 18:30 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Marcelo Tosatti, kvm, linux-kernel, Richard Cochran, Miroslav Lichvar

2017-01-20 15:23+0100, Paolo Bonzini:
> On 20/01/2017 15:02, Radim Krcmar wrote:
>> 2017-01-20 14:36+0100, Paolo Bonzini:
>>> On 20/01/2017 14:07, Marcelo Tosatti wrote:
>>>> On Fri, Jan 20, 2017 at 01:55:27PM +0100, Paolo Bonzini wrote:
>>>>>
>>>>>
>>>>> On 20/01/2017 13:20, Marcelo Tosatti wrote:
>>>>>>  kernel/time/timekeeping.c        |   79 +++++++++++++++++++++++++++++++++++++++
>>>>>
>>>>> Why not leave this in drivers/ptp/ptp_chardev.c?
>>>>
>>>> timekeeper_lock
>>>
>>> Why does emulate_ptp_sys_offset need it, if the current PTP_SYS_OFFSET
>>> code doesn't?  Is the latency acceptable (considering this is a raw spin
>>> lock) or is there a seqlock that we can use instead (such as tk_core.seq
>>> like in get_device_system_crosststamp)?
>> 
>> The spinlock prevents writers to take the tk_core.seq, which means that
>> time cannot be changed during that.
>> 
>> The simplest alternative would be to use tk_core.seq for all our reads,
>> but that would increse the chance of re-reading, even infinitely.
> 
> How much?  If a hypercall takes 1 microsecond, and PTP_MAX_SAMPLES is
> 25, we should be done in less than 50 microseconds.  If update_wall-time
> is called with 250 Hz frequency (sounds like a lot), that's still 4000
> microseconds so the probability of even 3-4 consecutive retries should
> be very low.

You are right, I was overestimating the worst case.
Host/guest preemption (1000 Hz) will also force a re-read, but both of
these diminishing probabilities and a tendency to align.

>> But we don't need to read everything with the same time base -- if the
>> time is changed (by NTP/user/...) between our reads, then the value will
>> be off, but if writer took tk_core.seq just to accumulate current time,
>> then the time after accumulation stays the same and it would work as if
>> we had the tk_core.seq for the whole time.
> 
> You mean only check seqlock separately for each sample, but restart the
> entire loop upon changes to cs_was_changed_seq or clock_was_set_seq?
> That would work too.

I wanted to accept that our measuerements can be imprecise and just have
the seqlock for each sample.  It should not make a difference without
misconfiguration and we can't do anything about a malicious root anyway.

Thanks.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 5/5] PTP: add kvm PTP driver
  2017-01-20 18:08       ` Radim Krcmar
@ 2017-01-20 19:10         ` Marcelo Tosatti
  2017-01-21  8:02         ` Paolo Bonzini
  1 sibling, 0 replies; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-20 19:10 UTC (permalink / raw)
  To: Radim Krcmar
  Cc: kvm, linux-kernel, Paolo Bonzini, Richard Cochran, Miroslav Lichvar

On Fri, Jan 20, 2017 at 07:08:49PM +0100, Radim Krcmar wrote:
> 2017-01-20 13:00-0200, Marcelo Tosatti:
> > On Fri, Jan 20, 2017 at 03:12:56PM +0100, Radim Krcmar wrote:
> >> 2017-01-20 10:20-0200, Marcelo Tosatti:
> >> > +	do {
> >> > +		/*
> >> > +		 * We are measuring the delay between
> >> > +		 * kvm_hypercall and rdtsc using TSC,
> >> > +		 * and converting that delta to
> >> > +		 * tsc_to_system_mul and tsc_shift
> >> > +		 * So any changes to tsc_to_system_mul
> >> > +		 * and tsc_shift in this region
> >> > +		 * invalidate the measurement.
> >> > +		 */
> >> > +		version = pvclock_read_begin(src);
> >> > +
> >> > +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
> >> > +				     clock_off_gpa,
> >> > +				     KVM_CLOCK_PAIRING_WALLCLOCK);
> >> > +		if (ret != 0) {
> >> > +			pr_err("clock offset hypercall ret %lu\n", ret);
> >> > +			spin_unlock(&kvm_ptp_lock);
> >> > +			preempt_enable_notrace();
> >> > +			return -EOPNOTSUPP;
> >> > +		}
> >> > +
> >> > +		tspec.tv_sec = clock_off.sec;
> >> > +		tspec.tv_nsec = clock_off.nsec;
> >> > +		ret = __pvclock_read_cycles(src, clock_off.tsc);
> >> > +		flags = src->flags;
> >> > +	} while (pvclock_read_retry(src, version));
> >> > +
> >> > +	preempt_enable_notrace();
> >> > +
> >> > +	system_counter->cycles = ret;
> >> > +	system_counter->cs = get_kvmclock_cs();
> >> 
> >> Can't we use clocksource_tsc and just pass the tsc without kvmclock in
> >> the middle?
> > 
> > No, it has to be the kvmclock value.
> 
> What happens if the guest switches from kvmclock to tsc?

The ioctl will return -ENODEV.

>From get_device_system_crosststamp function:

                /*
                 * Verify that the clocksource associated with the
                 * captured
                 * system counter value is the same as the currently
                 * installed
                 * timekeeper clocksource
                 */
                if (tk->tkr_mono.clock != system_counterval.cs)
                        return -ENODEV;

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-20 12:20 ` [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure Marcelo Tosatti
  2017-01-20 12:55   ` Paolo Bonzini
@ 2017-01-20 20:25   ` Richard Cochran
  2017-01-23 13:19     ` Marcelo Tosatti
  1 sibling, 1 reply; 36+ messages in thread
From: Richard Cochran @ 2017-01-20 20:25 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, linux-kernel, Paolo Bonzini, Radim Krcmar, Miroslav Lichvar

On Fri, Jan 20, 2017 at 10:20:29AM -0200, Marcelo Tosatti wrote:
> Emulate PTP_SYS_OFFSET by using an arithmetic mean of the
> realtime samples from ->getcrosststamp callback.

This change log is not very informative.

Yes, I can see that the new code calculates a mean.

But WHY is this needed?

And why is this kvm case so special, that it requires its own new flag
within the generic PHC subsystem?

Thanks,
Richard

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 5/5] PTP: add kvm PTP driver
  2017-01-20 18:08       ` Radim Krcmar
  2017-01-20 19:10         ` Marcelo Tosatti
@ 2017-01-21  8:02         ` Paolo Bonzini
  1 sibling, 0 replies; 36+ messages in thread
From: Paolo Bonzini @ 2017-01-21  8:02 UTC (permalink / raw)
  To: Radim Krcmar, Marcelo Tosatti
  Cc: kvm, linux-kernel, Richard Cochran, Miroslav Lichvar



On 20/01/2017 19:08, Radim Krcmar wrote:
> 2017-01-20 13:00-0200, Marcelo Tosatti:
>> On Fri, Jan 20, 2017 at 03:12:56PM +0100, Radim Krcmar wrote:
>>> 2017-01-20 10:20-0200, Marcelo Tosatti:
>>>> +	do {
>>>> +		/*
>>>> +		 * We are measuring the delay between
>>>> +		 * kvm_hypercall and rdtsc using TSC,
>>>> +		 * and converting that delta to
>>>> +		 * tsc_to_system_mul and tsc_shift
>>>> +		 * So any changes to tsc_to_system_mul
>>>> +		 * and tsc_shift in this region
>>>> +		 * invalidate the measurement.
>>>> +		 */
>>>> +		version = pvclock_read_begin(src);
>>>> +
>>>> +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
>>>> +				     clock_off_gpa,
>>>> +				     KVM_CLOCK_PAIRING_WALLCLOCK);
>>>> +		if (ret != 0) {
>>>> +			pr_err("clock offset hypercall ret %lu\n", ret);
>>>> +			spin_unlock(&kvm_ptp_lock);
>>>> +			preempt_enable_notrace();
>>>> +			return -EOPNOTSUPP;
>>>> +		}
>>>> +
>>>> +		tspec.tv_sec = clock_off.sec;
>>>> +		tspec.tv_nsec = clock_off.nsec;
>>>> +		ret = __pvclock_read_cycles(src, clock_off.tsc);
>>>> +		flags = src->flags;
>>>> +	} while (pvclock_read_retry(src, version));
>>>> +
>>>> +	preempt_enable_notrace();
>>>> +
>>>> +	system_counter->cycles = ret;
>>>> +	system_counter->cs = get_kvmclock_cs();
>>>
>>> Can't we use clocksource_tsc and just pass the tsc without kvmclock in
>>> the middle?
>>
>> No, it has to be the kvmclock value.
> 
> What happens if the guest switches from kvmclock to tsc?

get_device_system_crosststamp handles it, that's why there is a
clocksource field in system_counter.

Paolo

> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-20 20:25   ` Richard Cochran
@ 2017-01-23 13:19     ` Marcelo Tosatti
  2017-01-23 18:44       ` Richard Cochran
  0 siblings, 1 reply; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-23 13:19 UTC (permalink / raw)
  To: Richard Cochran
  Cc: kvm, linux-kernel, Paolo Bonzini, Radim Krcmar, Miroslav Lichvar

On Fri, Jan 20, 2017 at 09:25:02PM +0100, Richard Cochran wrote:
> On Fri, Jan 20, 2017 at 10:20:29AM -0200, Marcelo Tosatti wrote:
> > Emulate PTP_SYS_OFFSET by using an arithmetic mean of the
> > realtime samples from ->getcrosststamp callback.
> 
> This change log is not very informative.
> 
> Yes, I can see that the new code calculates a mean.
> 
> But WHY is this needed?

This is needed to generate the PTP_SYS_OFFSET data: a table with read
from realtime clock, read from device clock, read from realtime clock,
... :

time ->
device clock	|	|sample2|	|sample4|	|sample6| ...
-------------------------------------------------------------
realtime clock  |sample1|       |sample3|	|sample5|


Where sampleN is the read of the respective clock.

>From the following PTP_SYS_OFFSET_PRECISE data:

device clock	|sample1P,deviceclock|       |sample2P,deviceclock|
-------------------------------------------------------------
realtime clock  |sample1P,realtimeclock|     |sample2P,realtimeclock|

The mean of {sample2P,realtimeclock} and {sample1P,realtimeclock} is
used to generate sample1 for the realtime clock.

> And why is this kvm case so special, that it requires its own new flag
> within the generic PHC subsystem?

We get PTP_SYS_OFFSET_PRECISE support for users and use that to 
implement PTP_SYS_OFFSET as well. Other drivers could do the same, 
as long as their ->getcrosststamp callbacks return PTP synchronized
time.

Since the clock is moving forward, approximating the clock read at
"sample3" with the mean of "sample1" and "sample5" (in the first table
above) is a very good approximation.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-23 13:19     ` Marcelo Tosatti
@ 2017-01-23 18:44       ` Richard Cochran
  2017-01-23 19:44         ` Paolo Bonzini
  2017-01-23 23:06         ` Marcelo Tosatti
  0 siblings, 2 replies; 36+ messages in thread
From: Richard Cochran @ 2017-01-23 18:44 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, linux-kernel, Paolo Bonzini, Radim Krcmar, Miroslav Lichvar

On Mon, Jan 23, 2017 at 11:19:17AM -0200, Marcelo Tosatti wrote:
> This is needed to generate the PTP_SYS_OFFSET data: a table with read
> from realtime clock, read from device clock, read from realtime clock,
> ... :
> 
> time ->
> device clock	|	|sample2|	|sample4|	|sample6| ...
> -------------------------------------------------------------
> realtime clock  |sample1|       |sample3|	|sample5|

Here "realtime clock" is CLOCK_REALTIME on the guest, and "device
clock" is CLOCK_REALTIME on the host?
 
> Where sampleN is the read of the respective clock.
> 
> From the following PTP_SYS_OFFSET_PRECISE data:
> 
> device clock	|sample1P,deviceclock|       |sample2P,deviceclock|
> -------------------------------------------------------------
> realtime clock  |sample1P,realtimeclock|     |sample2P,realtimeclock|

Are |sample1P,deviceclock| and |sample1P,realtimeclock| taken at the
same instant in time?

If not, then calling that PTP_SYS_OFFSET_PRECISE is misleading.

Still don't get what you are doing here...

Thanks,
Richard

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-23 18:44       ` Richard Cochran
@ 2017-01-23 19:44         ` Paolo Bonzini
  2017-01-24  5:43           ` Richard Cochran
  2017-01-24 11:23           ` Marcelo Tosatti
  2017-01-23 23:06         ` Marcelo Tosatti
  1 sibling, 2 replies; 36+ messages in thread
From: Paolo Bonzini @ 2017-01-23 19:44 UTC (permalink / raw)
  To: Richard Cochran, Marcelo Tosatti
  Cc: kvm, linux-kernel, Radim Krcmar, Miroslav Lichvar



On 23/01/2017 19:44, Richard Cochran wrote:
>> device clock	   |sample1P,deviceclock|       |sample2P,deviceclock|
>> -------------------------------------------------------------
>> realtime clock  |sample1P,realtimeclock|     |sample2P,realtimeclock|
>
> Are |sample1P,deviceclock| and |sample1P,realtimeclock| taken at the
> same instant in time?
> 
> If not, then calling that PTP_SYS_OFFSET_PRECISE is misleading.

Yes, of course.  This was added for the e1000e drivers first, but chrony
isn't using it yet.  In the case of KVM, the same host TSC value is used
to produce the host clock and (converted to guest TSC) the guest clock.

If you just implement getclock64 the PTP_SYS_OFFSET output:

device clock	|	|sample2|	|sample4|	|sample6| ...
-------------------------------------------------------------
realtime clock  |sample1|       |sample3|	|sample5|

has a very large distance between samples on the same line (about 1 us),
and I think it is too noisy for userspace to make sense of the output.

So, on one hand chrony only uses the mean of realtime clock samples, in
an attempt to produce precise cross timestamps.  On the other hand, even
though KVM could produce those natively, chrony does not support
PTP_SYS_OFFSET_PRECISE.

Marcelo's patch then produces fake realtime clock samples that, however,
let chrony derive the cross timestamps that KVM produced in the first
place.  The outcome is really great accuracy compared to previous
versions of the patch, often just +/- 2 or 3 nanoseconds.

Paolo

> Still don't get what you are doing here...

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-23 18:44       ` Richard Cochran
  2017-01-23 19:44         ` Paolo Bonzini
@ 2017-01-23 23:06         ` Marcelo Tosatti
  2017-01-24  5:32           ` Richard Cochran
  1 sibling, 1 reply; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-23 23:06 UTC (permalink / raw)
  To: Richard Cochran
  Cc: kvm, linux-kernel, Paolo Bonzini, Radim Krcmar, Miroslav Lichvar

On Mon, Jan 23, 2017 at 07:44:15PM +0100, Richard Cochran wrote:
> On Mon, Jan 23, 2017 at 11:19:17AM -0200, Marcelo Tosatti wrote:
> > This is needed to generate the PTP_SYS_OFFSET data: a table with read
> > from realtime clock, read from device clock, read from realtime clock,
> > ... :
> > 
> > time ->
> > device clock	|	|sample2|	|sample4|	|sample6| ...
> > -------------------------------------------------------------
> > realtime clock  |sample1|       |sample3|	|sample5|
> 
> Here "realtime clock" is CLOCK_REALTIME on the guest, and "device
> clock" is CLOCK_REALTIME on the host?

Yes.

>  
> > Where sampleN is the read of the respective clock.
> > 
> > From the following PTP_SYS_OFFSET_PRECISE data:
> > 
> > device clock	|sample1P,deviceclock|       |sample2P,deviceclock|
> > -------------------------------------------------------------
> > realtime clock  |sample1P,realtimeclock|     |sample2P,realtimeclock|
> 
> Are |sample1P,deviceclock| and |sample1P,realtimeclock| taken at the
> same instant in time?

Yes they are.

> If not, then calling that PTP_SYS_OFFSET_PRECISE is misleading.

It is.

> Still don't get what you are doing here...

What KVM provides is reading two clocks (host clock and guest clock)
at the exact same time (because both are based on TSC, so you do RDTSC 
once and use that result to calculate two clocks: host and guest realtime
clocks).

So what this interface provides is the following: If a PTP driver
provides ->getcrosstimestamps callback (meaning the driver can read
the value of two clocks at the same instant), and does not provide 
->gettime64 callback, then you approximate the first "diagram" 
using the second "diagram" (ie: the arithmetic mean of the samples).

Can you please describe what problem exists with this scheme?

Would you prefer to get rid of the "emulation" of ->gettime64 
by implementing ->gettime64 directly? Can do that as well.

Thanks.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-23 23:06         ` Marcelo Tosatti
@ 2017-01-24  5:32           ` Richard Cochran
  2017-01-24  8:15             ` Miroslav Lichvar
  0 siblings, 1 reply; 36+ messages in thread
From: Richard Cochran @ 2017-01-24  5:32 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, linux-kernel, Paolo Bonzini, Radim Krcmar, Miroslav Lichvar

On Mon, Jan 23, 2017 at 09:06:20PM -0200, Marcelo Tosatti wrote:
> Can you please describe what problem exists with this scheme?

This new kernel code exists just because chrony doesn't implement the
PRECISE ioctl.  Instead of adding new "fake" modes, just teach chrony
about the better method.

I prefer to have kernel interfaces whose behavior is clear and
consistent.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-23 19:44         ` Paolo Bonzini
@ 2017-01-24  5:43           ` Richard Cochran
  2017-01-24 11:23           ` Marcelo Tosatti
  1 sibling, 0 replies; 36+ messages in thread
From: Richard Cochran @ 2017-01-24  5:43 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Marcelo Tosatti, kvm, linux-kernel, Radim Krcmar, Miroslav Lichvar

On Mon, Jan 23, 2017 at 08:44:53PM +0100, Paolo Bonzini wrote:
> If you just implement getclock64 the PTP_SYS_OFFSET output:
> 
> device clock	|	|sample2|	|sample4|	|sample6| ...
> -------------------------------------------------------------
> realtime clock  |sample1|       |sample3|	|sample5|
> 
> has a very large distance between samples on the same line (about 1 us),
> and I think it is too noisy for userspace to make sense of the output.

One microsecond is not too bad at all.  The PCIe devices have
intervals of 5-6 usec, and the result is quite usable, certainly
better than generic NTP.

> Marcelo's patch then produces fake realtime clock samples that, however,
> let chrony derive the cross timestamps that KVM produced in the first
> place.  The outcome is really great accuracy compared to previous
> versions of the patch, often just +/- 2 or 3 nanoseconds.

Why can't chrony learn about PTP_SYS_OFFSET_PRECISE?

Thanks,
Richard

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-24  5:32           ` Richard Cochran
@ 2017-01-24  8:15             ` Miroslav Lichvar
  0 siblings, 0 replies; 36+ messages in thread
From: Miroslav Lichvar @ 2017-01-24  8:15 UTC (permalink / raw)
  To: Richard Cochran
  Cc: Marcelo Tosatti, kvm, linux-kernel, Paolo Bonzini, Radim Krcmar

On Tue, Jan 24, 2017 at 06:32:43AM +0100, Richard Cochran wrote:
> On Mon, Jan 23, 2017 at 09:06:20PM -0200, Marcelo Tosatti wrote:
> > Can you please describe what problem exists with this scheme?
> 
> This new kernel code exists just because chrony doesn't implement the
> PRECISE ioctl.  Instead of adding new "fake" modes, just teach chrony
> about the better method.

The latest development code of chrony now supports the PRECISE ioctl.

I did some tests with an e1000e NIC (i219) and it seemed the stability
was slightly worse than with the non-PRECISE ioctl, but there was a
400-500ns offset between the two, so it should be much more accurate
(we finally have something that avoids the asymmetry on the PCI-e
bus?). Configuring the refclock with a shorter dpoll should compensate
for the decrease in stability. In any case, there is a "nocrossts"
option to not use the PRECISE ioctl even if it's supported.

-- 
Miroslav Lichvar

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-23 19:44         ` Paolo Bonzini
  2017-01-24  5:43           ` Richard Cochran
@ 2017-01-24 11:23           ` Marcelo Tosatti
  2017-01-24 11:35             ` Richard Cochran
  1 sibling, 1 reply; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-24 11:23 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Richard Cochran, kvm, linux-kernel, Radim Krcmar, Miroslav Lichvar

On Mon, Jan 23, 2017 at 08:44:53PM +0100, Paolo Bonzini wrote:
> 
> 
> On 23/01/2017 19:44, Richard Cochran wrote:
> >> device clock	   |sample1P,deviceclock|       |sample2P,deviceclock|
> >> -------------------------------------------------------------
> >> realtime clock  |sample1P,realtimeclock|     |sample2P,realtimeclock|
> >
> > Are |sample1P,deviceclock| and |sample1P,realtimeclock| taken at the
> > same instant in time?
> > 
> > If not, then calling that PTP_SYS_OFFSET_PRECISE is misleading.
> 
> Yes, of course.  This was added for the e1000e drivers first, but chrony
> isn't using it yet.  In the case of KVM, the same host TSC value is used
> to produce the host clock and (converted to guest TSC) the guest clock.
> 
> If you just implement getclock64 the PTP_SYS_OFFSET output:
> 
> device clock	|	|sample2|	|sample4|	|sample6| ...
> -------------------------------------------------------------
> realtime clock  |sample1|       |sample3|	|sample5|
> 
> has a very large distance between samples on the same line (about 1 us),
> and I think it is too noisy for userspace to make sense of the output.
> 
> So, on one hand chrony only uses the mean of realtime clock samples, in
> an attempt to produce precise cross timestamps.  On the other hand, even
> though KVM could produce those natively, chrony does not support
> PTP_SYS_OFFSET_PRECISE.
> 
> Marcelo's patch then produces fake realtime clock samples that, however,
> let chrony derive the cross timestamps that KVM produced in the first
> place.  The outcome is really great accuracy compared to previous
> versions of the patch, often just +/- 2 or 3 nanoseconds.
> 
> Paolo

Nope, the clock offset is the value inside square brackets. The +/-
is the error. So the clock offset is actually up to 680ns.

Yes for greater accuracy userspace should implement _PRECISE
support.

I'm resending v5 with native support for ->gettime and 
->getcrosststamps.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure
  2017-01-24 11:23           ` Marcelo Tosatti
@ 2017-01-24 11:35             ` Richard Cochran
  0 siblings, 0 replies; 36+ messages in thread
From: Richard Cochran @ 2017-01-24 11:35 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: Paolo Bonzini, kvm, linux-kernel, Radim Krcmar, Miroslav Lichvar

On Tue, Jan 24, 2017 at 09:23:29AM -0200, Marcelo Tosatti wrote:
> I'm resending v5 with native support for ->gettime and 
> ->getcrosststamps.

And can you please drop the "fake" PTP_SYS_OFFSET stuff?

Here is another reason why this is illogical.  The application can
choose how many samples to take during PTP_SYS_OFFSET.  It will choose
more samples if it wants a better picture of the offsets, but in that
case the "fake" samples would cause more computation in the kernel for
no added benefit.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [patch 3/5] kvmclock: export kvmclock clocksource pointer
  2017-01-20 14:51 [patch 0/5] KVM virtual PTP driver (v4) Marcelo Tosatti
@ 2017-01-20 14:51 ` Marcelo Tosatti
  0 siblings, 0 replies; 36+ messages in thread
From: Marcelo Tosatti @ 2017-01-20 14:51 UTC (permalink / raw)
  To: kvm, linux-kernel
  Cc: Paolo Bonzini, Radim Krcmar, Richard Cochran, Miroslav Lichvar,
	Marcelo Tosatti

[-- Attachment #1: kvmclock-export-clocksource --]
[-- Type: text/plain, Size: 1367 bytes --]

To be used by KVM PTP driver.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 arch/x86/include/asm/kvmclock.h |    6 ++++++
 arch/x86/kernel/kvmclock.c      |    3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

v2: export kvmclock clocksource structure directly (Paolo)

Index: kvm-ptpdriver/arch/x86/include/asm/kvmclock.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ kvm-ptpdriver/arch/x86/include/asm/kvmclock.h	2017-01-20 11:05:28.720038512 -0200
@@ -0,0 +1,6 @@
+#ifndef _ASM_X86_KVM_CLOCK_H
+#define _ASM_X86_KVM_CLOCK_H
+
+extern struct clocksource kvm_clock;
+
+#endif /* _ASM_X86_KVM_CLOCK_H */
Index: kvm-ptpdriver/arch/x86/kernel/kvmclock.c
===================================================================
--- kvm-ptpdriver.orig/arch/x86/kernel/kvmclock.c	2017-01-20 10:03:44.750489955 -0200
+++ kvm-ptpdriver/arch/x86/kernel/kvmclock.c	2017-01-20 11:05:03.137971637 -0200
@@ -28,6 +28,7 @@
 
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
+#include <asm/kvmclock.h>
 
 static int kvmclock __ro_after_init = 1;
 static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
@@ -174,7 +175,7 @@
 	return ret;
 }
 
-static struct clocksource kvm_clock = {
+struct clocksource kvm_clock = {
 	.name = "kvm-clock",
 	.read = kvm_clock_get_cycles,
 	.rating = 400,

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2017-01-24 11:35 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-20 12:20 [patch 0/5] KVM virtual PTP driver (v3) Marcelo Tosatti
2017-01-20 12:20 ` [patch 1/5] KVM: x86: provide realtime host clock via vsyscall notifiers Marcelo Tosatti
2017-01-20 12:20 ` [patch 2/5] KVM: x86: add KVM_HC_CLOCK_OFFSET hypercall Marcelo Tosatti
2017-01-20 12:20 ` [patch 3/5] kvmclock: export kvmclock clocksource pointer Marcelo Tosatti
2017-01-20 12:55   ` Paolo Bonzini
2017-01-20 12:20 ` [patch 4/5] PTP: add PTP_SYS_OFFSET emulation via cross timestamps infrastructure Marcelo Tosatti
2017-01-20 12:55   ` Paolo Bonzini
2017-01-20 13:07     ` Marcelo Tosatti
2017-01-20 13:36       ` Paolo Bonzini
2017-01-20 13:52         ` Marcelo Tosatti
2017-01-20 14:02         ` Radim Krcmar
2017-01-20 14:23           ` Paolo Bonzini
2017-01-20 14:31             ` Miroslav Lichvar
2017-01-20 18:30             ` Radim Krcmar
2017-01-20 20:25   ` Richard Cochran
2017-01-23 13:19     ` Marcelo Tosatti
2017-01-23 18:44       ` Richard Cochran
2017-01-23 19:44         ` Paolo Bonzini
2017-01-24  5:43           ` Richard Cochran
2017-01-24 11:23           ` Marcelo Tosatti
2017-01-24 11:35             ` Richard Cochran
2017-01-23 23:06         ` Marcelo Tosatti
2017-01-24  5:32           ` Richard Cochran
2017-01-24  8:15             ` Miroslav Lichvar
2017-01-20 12:20 ` [patch 5/5] PTP: add kvm PTP driver Marcelo Tosatti
2017-01-20 12:58   ` Paolo Bonzini
2017-01-20 13:11     ` Marcelo Tosatti
2017-01-20 14:12   ` Radim Krcmar
2017-01-20 14:20     ` Radim Krcmar
2017-01-20 15:00     ` Marcelo Tosatti
2017-01-20 17:11       ` Paolo Bonzini
2017-01-20 18:08       ` Radim Krcmar
2017-01-20 19:10         ` Marcelo Tosatti
2017-01-21  8:02         ` Paolo Bonzini
2017-01-20 13:10 ` [patch 0/5] KVM virtual PTP driver (v3) Paolo Bonzini
2017-01-20 14:51 [patch 0/5] KVM virtual PTP driver (v4) Marcelo Tosatti
2017-01-20 14:51 ` [patch 3/5] kvmclock: export kvmclock clocksource pointer Marcelo Tosatti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).