linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4.16-rc5 (3)] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
@ 2018-03-14  4:20 jason.vas.dias
  2018-03-14  4:20 ` [PATCH v4.16-rc5 1/3] " jason.vas.dias
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: jason.vas.dias @ 2018-03-14  4:20 UTC (permalink / raw)
  To: linux-kernel; +Cc: x86, tglx, mingo, peterz, andi



  Currently the VDSO does not handle
     clock_gettime( CLOCK_MONOTONIC_RAW, &ts )
  on Intel / AMD - it calls
     vdso_fallback_gettime()
  for this clock, which issues a syscall, having an unacceptably high
  latency (minimum measurable time or time between measurements)
  of 300-700ns on 2 2.8-3.9ghz Haswell x86_64 Family'_'Model : 06_3C
  machines under various versions of Linux.

  Sometimes, particularly when correlating elapsed time to performance
  counter values, user-space  code needs to know elapsed time from the
  perspective of the CPU no matter how "hot" / fast or "cold" / slow it
  might be running wrt NTP / PTP "real" time; when code needs this,
  the latencies associated with a syscall are often unacceptably high.

  I reported this as Bug #198161 :
    'https://bugzilla.kernel.org/show_bug.cgi?id=198961'
  and in previous posts with subjects matching 'CLOCK_MONOTONIC_RAW' .
     
  This patch handles CLOCK_MONOTONIC_RAW clock_gettime() in the VDSO ,
  by exporting the raw clock calibration, last cycles, last xtime_nsec,
  and last raw_sec value in the vsyscall_gtod_data during vsyscall_update() .

  Now the new do_monotonic_raw() function in the vDSO has a latency of @ 24ns
  on average, and the test program:
   tools/testing/selftest/timers/inconsistency-check.c
  succeeds with arguments: '-c 4 -t 120' or any arbitrary -t value.

  The patch is against Linus' latest 4.16-rc5 tree,
  current HEAD of :
    git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
  .

  This patch affects only files:
  
   arch/x86/include/asm/vgtod.h
   arch/x86/entry/vdso/vclock_gettime.c
   arch/x86/entry/vdso/vdso.lds.S
   arch/x86/entry/vdso/vdsox32.lds.S
   arch/x86/entry/vdso/vdso32/vdso32.lds.S      
   arch/x86/entry/vsyscall/vsyscall_gtod.c

  There are 3 patches in the series :

   Patch #1 makes the VDSO handle clock_gettime(CLOCK_MONOTONIC_RAW) with rdtsc_ordered()

   Patch #2 makes the VDSO handle clock_gettime(CLOCK_MONOTONIC_RAW) with a new rdtscp() function in msr.h

   Patch #3 makes the VDSO export TSC calibration data via a new function in the vDSO: 
               unsigned int __vdso_linux_tsc_calibration ( struct linux_tsc_calibration *tsc_cal )
            that user code can optionally call.

   Patches #2 & #3 should be considered "optional" .

   Patch #2 makes clock_gettime(CLOCK_MONOTONIC_RAW) calls have @ half the latency
   of clock_gettime(CLOCK_MONOTONIC) calls.

   I think something like Patch #3 is necessary to export TSC calibration data to user-space TSC readers.


Best Regards,
Jason Vas Dias

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v4.16-rc5 1/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-14  4:20 [PATCH v4.16-rc5 (3)] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW jason.vas.dias
@ 2018-03-14  4:20 ` jason.vas.dias
  2018-03-14 14:27   ` Thomas Gleixner
  2018-03-16  6:11   ` kbuild test robot
  2018-03-14  4:20 ` [PATCH v4.16-rc5 2/3] " jason.vas.dias
  2018-03-14  4:20 ` [PATCH v4.16-rc5 3/3] " jason.vas.dias
  2 siblings, 2 replies; 8+ messages in thread
From: jason.vas.dias @ 2018-03-14  4:20 UTC (permalink / raw)
  To: linux-kernel; +Cc: x86, tglx, mingo, peterz, andi

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d..fbc7371 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -182,6 +182,18 @@ notrace static u64 vread_tsc(void)
 	return last;
 }
 
+notrace static u64 vread_tsc_raw(void)
+{
+	u64 tsc
+	  , last = gtod->raw_cycle_last;
+
+	tsc	      = rdtsc_ordered();
+	if (likely(tsc >= last))
+		return tsc;
+	asm volatile ("");
+	return last;
+}
+
 notrace static inline u64 vgetsns(int *mode)
 {
 	u64 v;
@@ -203,6 +215,27 @@ notrace static inline u64 vgetsns(int *mode)
 	return v * gtod->mult;
 }
 
+notrace static inline u64 vgetsns_raw(int *mode)
+{
+	u64 v;
+	cycles_t cycles;
+
+	if (gtod->vclock_mode == VCLOCK_TSC)
+		cycles = vread_tsc_raw();
+#ifdef CONFIG_PARAVIRT_CLOCK
+	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
+		cycles = vread_pvclock(mode);
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+	else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
+		cycles = vread_hvclock(mode);
+#endif
+	else
+		return 0;
+	v = (cycles - gtod->raw_cycle_last) & gtod->raw_mask;
+	return v * gtod->raw_mult;
+}
+
 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
 notrace static int __always_inline do_realtime(struct timespec *ts)
 {
@@ -246,6 +279,27 @@ notrace static int __always_inline do_monotonic(struct timespec *ts)
 	return mode;
 }
 
+notrace static __always_inline int do_monotonic_raw(struct timespec *ts)
+{
+	unsigned long seq;
+	u64 ns;
+	int mode;
+
+	do {
+		seq = gtod_read_begin(gtod);
+		mode = gtod->vclock_mode;
+		ts->tv_sec = gtod->monotonic_time_raw_sec;
+		ns = gtod->monotonic_time_raw_nsec;
+		ns += vgetsns_raw(&mode);
+		ns >>= gtod->raw_shift;
+	} while (unlikely(gtod_read_retry(gtod, seq)));
+
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
+	return mode;
+}
+
 notrace static void do_realtime_coarse(struct timespec *ts)
 {
 	unsigned long seq;
@@ -277,6 +331,10 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 		if (do_monotonic(ts) == VCLOCK_NONE)
 			goto fallback;
 		break;
+	case CLOCK_MONOTONIC_RAW:
+		if (do_monotonic_raw(ts) == VCLOCK_NONE)
+			goto fallback;
+		break;
 	case CLOCK_REALTIME_COARSE:
 		do_realtime_coarse(ts);
 		break;
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index e1216dd..5af7093 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -45,6 +45,11 @@ void update_vsyscall(struct timekeeper *tk)
 	vdata->mult		= tk->tkr_mono.mult;
 	vdata->shift		= tk->tkr_mono.shift;
 
+	vdata->raw_cycle_last	= tk->tkr_raw.cycle_last;
+	vdata->raw_mask		= tk->tkr_raw.mask;
+	vdata->raw_mult		= tk->tkr_raw.mult;
+	vdata->raw_shift	= tk->tkr_raw.shift;
+
 	vdata->wall_time_sec		= tk->xtime_sec;
 	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
 
@@ -74,5 +79,8 @@ void update_vsyscall(struct timekeeper *tk)
 		vdata->monotonic_time_coarse_sec++;
 	}
 
+	vdata->monotonic_time_raw_sec  = tk->raw_sec;
+	vdata->monotonic_time_raw_nsec = tk->tkr_raw.xtime_nsec;
+
 	gtod_write_end(vdata);
 }
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index fb856c9..24e4d45 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -22,6 +22,10 @@ struct vsyscall_gtod_data {
 	u64	mask;
 	u32	mult;
 	u32	shift;
+	u64	raw_cycle_last;
+	u64	raw_mask;
+	u32	raw_mult;
+	u32	raw_shift;
 
 	/* open coded 'struct timespec' */
 	u64		wall_time_snsec;
@@ -32,6 +36,8 @@ struct vsyscall_gtod_data {
 	gtod_long_t	wall_time_coarse_nsec;
 	gtod_long_t	monotonic_time_coarse_sec;
 	gtod_long_t	monotonic_time_coarse_nsec;
+	gtod_long_t	monotonic_time_raw_sec;
+	gtod_long_t	monotonic_time_raw_nsec;
 
 	int		tz_minuteswest;
 	int		tz_dsttime;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4.16-rc5 2/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-14  4:20 [PATCH v4.16-rc5 (3)] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW jason.vas.dias
  2018-03-14  4:20 ` [PATCH v4.16-rc5 1/3] " jason.vas.dias
@ 2018-03-14  4:20 ` jason.vas.dias
  2018-03-14 14:48   ` Thomas Gleixner
  2018-03-14  4:20 ` [PATCH v4.16-rc5 3/3] " jason.vas.dias
  2 siblings, 1 reply; 8+ messages in thread
From: jason.vas.dias @ 2018-03-14  4:20 UTC (permalink / raw)
  To: linux-kernel; +Cc: x86, tglx, mingo, peterz, andi

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index fbc7371..2c46675 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -184,10 +184,9 @@ notrace static u64 vread_tsc(void)
 
 notrace static u64 vread_tsc_raw(void)
 {
-	u64 tsc
+	u64 tsc  = (gtod->has_rdtscp ? rdtscp((void*)0) : rdtsc_ordered())
 	  , last = gtod->raw_cycle_last;
 
-	tsc	      = rdtsc_ordered();
 	if (likely(tsc >= last))
 		return tsc;
 	asm volatile ("");
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index 5af7093..0327a95 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -16,6 +16,9 @@
 #include <linux/timekeeper_internal.h>
 #include <asm/vgtod.h>
 #include <asm/vvar.h>
+#include <asm/cpufeature.h>
+
+extern unsigned tsc_khz;
 
 int vclocks_used __read_mostly;
 
@@ -49,6 +52,7 @@ void update_vsyscall(struct timekeeper *tk)
 	vdata->raw_mask		= tk->tkr_raw.mask;
 	vdata->raw_mult		= tk->tkr_raw.mult;
 	vdata->raw_shift	= tk->tkr_raw.shift;
+	vdata->has_rdtscp	= static_cpu_has(X86_FEATURE_RDTSCP);
 
 	vdata->wall_time_sec		= tk->xtime_sec;
 	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 30df295..a5ff704 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -218,6 +218,36 @@ static __always_inline unsigned long long rdtsc_ordered(void)
 	return rdtsc();
 }
 
+/**
+ * rdtscp() - read the current TSC and (optionally) CPU number, with built-in
+ *            cancellation point replacing barrier - only available
+ *            if static_cpu_has(X86_FEATURE_RDTSCP) .
+ * returns:   The 64-bit Time Stamp Counter (TSC) value.
+ * Optionally, 'cpu_out' can be non-null, and on return it will contain
+ * the number (Intel CPU ID) of the CPU that the task is currently running on.
+ * As does EAX_EDT_RET, this uses the "open-coded asm" style to
+ * force the compiler + assembler to always use (eax, edx, ecx) registers,
+ * NOT whole (rax, rdx, rcx) on x86_64 , because only 32-bit 
+ * variables are used - exactly the same code should be generated
+ * for this instruction on 32-bit as on 64-bit when this asm stanza is used.
+ * See: SDM , Vol #2, RDTSCP instruction.
+ */
+static __always_inline u64 rdtscp(u32 *cpu_out)
+{
+	u32	tsc_lo, tsc_hi, tsc_cpu;
+	asm volatile
+	( "rdtscp"
+		:   "=a" (tsc_lo)
+		  , "=d" (tsc_hi)
+		  , "=c" (tsc_cpu)
+	); // : eax, edx, ecx used - NOT rax, rdx, rcx
+	if (unlikely(cpu_out != ((void*)0)))
+		*cpu_out = tsc_cpu;
+	return ((((u64)tsc_hi) << 32) |
+		(((u64)tsc_lo) & 0x0ffffffffULL )
+	       );
+}
+
 /* Deprecated, keep it for a cycle for easier merging: */
 #define rdtscll(now)	do { (now) = rdtsc_ordered(); } while (0)
 
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 24e4d45..e7e4804 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -26,6 +26,7 @@ struct vsyscall_gtod_data {
 	u64	raw_mask;
 	u32	raw_mult;
 	u32	raw_shift;
+	u32     has_rdtscp;
 
 	/* open coded 'struct timespec' */
 	u64		wall_time_snsec;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4.16-rc5 3/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-14  4:20 [PATCH v4.16-rc5 (3)] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW jason.vas.dias
  2018-03-14  4:20 ` [PATCH v4.16-rc5 1/3] " jason.vas.dias
  2018-03-14  4:20 ` [PATCH v4.16-rc5 2/3] " jason.vas.dias
@ 2018-03-14  4:20 ` jason.vas.dias
  2 siblings, 0 replies; 8+ messages in thread
From: jason.vas.dias @ 2018-03-14  4:20 UTC (permalink / raw)
  To: linux-kernel; +Cc: x86, tglx, mingo, peterz, andi

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 2c46675..772988c 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -21,6 +21,7 @@
 #include <linux/math64.h>
 #include <linux/time.h>
 #include <linux/kernel.h>
+#include <uapi/asm/vdso_tsc_calibration.h>
 
 #define gtod (&VVAR(vsyscall_gtod_data))
 
@@ -184,7 +185,7 @@ notrace static u64 vread_tsc(void)
 
 notrace static u64 vread_tsc_raw(void)
 {
-	u64 tsc  = (gtod->has_rdtscp ? rdtscp((void*)0) : rdtsc_ordered())
+	u64 tsc  = (gtod->has_rdtscp ? rdtscp((void *)0) : rdtsc_ordered())
 	  , last = gtod->raw_cycle_last;
 
 	if (likely(tsc >= last))
@@ -383,3 +384,21 @@ notrace time_t __vdso_time(time_t *t)
 }
 time_t time(time_t *t)
 	__attribute__((weak, alias("__vdso_time")));
+
+unsigned int __vdso_linux_tsc_calibration(
+	struct linux_tsc_calibration_s *tsc_cal);
+
+notrace	unsigned int
+__vdso_linux_tsc_calibration(struct linux_tsc_calibration_s *tsc_cal)
+{
+	if ((gtod->vclock_mode == VCLOCK_TSC) && (tsc_cal != ((void *)0UL))) {
+		tsc_cal->tsc_khz = gtod->tsc_khz;
+		tsc_cal->mult    = gtod->raw_mult;
+		tsc_cal->shift   = gtod->raw_shift;
+		return 1;
+	}
+	return 0;
+}
+
+unsigned int linux_tsc_calibration(struct linux_tsc_calibration_s *tsc_cal)
+	__attribute((weak, alias("__vdso_linux_tsc_calibration")));
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index d3a2dce..e0b5cce 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -25,6 +25,8 @@ VERSION {
 		__vdso_getcpu;
 		time;
 		__vdso_time;
+		linux_tsc_calibration;
+		__vdso_linux_tsc_calibration;
 	local: *;
 	};
 }
diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
index 422764a..17fd07f 100644
--- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
@@ -26,6 +26,7 @@ VERSION
 		__vdso_clock_gettime;
 		__vdso_gettimeofday;
 		__vdso_time;
+		__vdso_linux_tsc_calibration;
 	};
 
 	LINUX_2.5 {
diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S
index 05cd1c5..7acac71 100644
--- a/arch/x86/entry/vdso/vdsox32.lds.S
+++ b/arch/x86/entry/vdso/vdsox32.lds.S
@@ -21,6 +21,7 @@ VERSION {
 		__vdso_gettimeofday;
 		__vdso_getcpu;
 		__vdso_time;
+		__vdso_linux_tsc_calibration;
 	local: *;
 	};
 }
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index 0327a95..692562a 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -53,6 +53,7 @@ void update_vsyscall(struct timekeeper *tk)
 	vdata->raw_mult		= tk->tkr_raw.mult;
 	vdata->raw_shift	= tk->tkr_raw.shift;
 	vdata->has_rdtscp	= static_cpu_has(X86_FEATURE_RDTSCP);
+	vdata->tsc_khz          = tsc_khz;
 
 	vdata->wall_time_sec		= tk->xtime_sec;
 	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index a5ff704..c7b2ed2 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -227,7 +227,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
  * the number (Intel CPU ID) of the CPU that the task is currently running on.
  * As does EAX_EDT_RET, this uses the "open-coded asm" style to
  * force the compiler + assembler to always use (eax, edx, ecx) registers,
- * NOT whole (rax, rdx, rcx) on x86_64 , because only 32-bit 
+ * NOT whole (rax, rdx, rcx) on x86_64 , because only 32-bit
  * variables are used - exactly the same code should be generated
  * for this instruction on 32-bit as on 64-bit when this asm stanza is used.
  * See: SDM , Vol #2, RDTSCP instruction.
@@ -236,15 +236,15 @@ static __always_inline u64 rdtscp(u32 *cpu_out)
 {
 	u32	tsc_lo, tsc_hi, tsc_cpu;
 	asm volatile
-	( "rdtscp"
+	("rdtscp"
 		:   "=a" (tsc_lo)
 		  , "=d" (tsc_hi)
 		  , "=c" (tsc_cpu)
 	); // : eax, edx, ecx used - NOT rax, rdx, rcx
-	if (unlikely(cpu_out != ((void*)0)))
+	if (unlikely(cpu_out != ((void *)0)))
 		*cpu_out = tsc_cpu;
 	return ((((u64)tsc_hi) << 32) |
-		(((u64)tsc_lo) & 0x0ffffffffULL )
+		(((u64)tsc_lo) & 0x0ffffffffULL)
 	       );
 }
 
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index e7e4804..75078fc 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -27,6 +27,7 @@ struct vsyscall_gtod_data {
 	u32	raw_mult;
 	u32	raw_shift;
 	u32     has_rdtscp;
+	u32     tsc_khz;
 
 	/* open coded 'struct timespec' */
 	u64		wall_time_snsec;
diff --git a/arch/x86/include/uapi/asm/vdso_tsc_calibration.h b/arch/x86/include/uapi/asm/vdso_tsc_calibration.h
new file mode 100644
index 0000000..8ca3090
--- /dev/null
+++ b/arch/x86/include/uapi/asm/vdso_tsc_calibration.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_X86_VDSO_TSC_CALIBRATION_H
+#define _ASM_X86_VDSO_TSC_CALIBRATION_H
+/*
+ * Programs that want to use rdtsc / rdtscp instructions
+ * from user-space can make use of the Linux kernel TSC calibration
+ * by calling :
+ *    __vdso_linux_tsc_calibration(struct linux_tsc_calibration_s *);
+ * ( one has to resolve this symbol as in
+ *   tools/testing/selftests/vDSO/parse_vdso.c
+ * )
+ * which fills in a structure
+ * with the following layout :
+ */
+
+/** struct linux_tsc_calibration_s -
+ * mult:    amount to multiply 64-bit TSC value by
+ * shift:   the right shift to apply to (mult*TSC) yielding nanoseconds
+ * tsc_khz: the calibrated TSC frequency in KHz from which previous
+ *          members calculated
+ */
+struct linux_tsc_calibration_s {
+
+	unsigned int mult;
+	unsigned int shift;
+	unsigned int tsc_khz;
+
+};
+
+/* To use:
+ *
+ *  static unsigned
+ *  (*linux_tsc_cal)(struct linux_tsc_calibration_s *linux_tsc_cal) =
+ *    vdso_sym("LINUX_2.6", "__vdso_linux_tsc_calibration");
+ *  if( linux_tsc_cal == 0UL )
+ *  { fprintf(stderr,
+ *    "the patch providing __vdso_linux_tsc_calibration "
+ *    "is not applied to the kernel.\n");
+ *    return ERROR;
+ *  }
+ *  static struct linux_tsc_calibration clock_source={0};
+ *  if((clock_source.mult==0) && ! (*linux_tsc_cal)(&clock_source) )
+ *    fprintf(stderr,"TSC is not the system clocksource.\n");
+ *  unsigned int tsc_lo, tsc_hi, tsc_cpu;
+ *  asm volatile
+ *  ( "rdtscp" : (=a) tsc_hi,  (=d) tsc_lo, (=c) tsc_cpu );
+ *  unsigned long tsc = (((unsigned long)tsc_hi) << 32) | tsc_lo;
+ *  unsigned long nanoseconds =
+ *   (( clock_source . mult ) * tsc ) >> (clock_source . shift);
+ *
+ *  nanoseconds is now TSC value converted to nanoseconds,
+ *  according to Linux' clocksource calibration values.
+ *  Incidentally, 'tsc_cpu' is the number of the CPU the task is running on.
+ *
+ * But better results are obtained by applying this to the difference (delta)
+ * and adding this to some previous timespec value:
+ *   static u64 previous_tsc=0, previous_nsec=0, previous_sec=0;
+ *   u64  tsc      = rdtscp();
+ *   u64  delta    = tsc - previous_tsc;
+ *   u64  nsec     = ((delta * clock_source.mult) + previous_nsec )
+ *	           >> clock_source.shift;
+ *   ts->tv_sec    = previous_sec + (nsec / NSEC_PER_SEC);
+ *   ts->tv_nsec   = nsec % NSEC_PER_SEC;
+ *   previous_tsc  = tsc
+ *   previous_sec  = ts->tv_sec;
+ *   previous_nsec = ts->tv_nsec << clock_source.shift;
+ *   return ts;
+ * This is broadly like the approach taken by Linux kernel & in VDSO .
+ *
+ * Or, in user-space, with floating point, one could use the rdtscp value as
+ * number of picoseconds :
+ *     u64 ns = lround( ((double)rdtscp())
+ *            / (((double)clock_source.tsc_khz) / 1e3) );
+ * (ie. if tsc_khz is 3000 , there are 3 tsc ticks per nanosecond, so divide
+ *  tsc ticks by 3).
+ *
+ * There should actually be very little difference between the two values
+ * obtained : (@ 0.02%)
+ * by either method.
+ */
+
+#endif

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v4.16-rc5 1/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-14  4:20 ` [PATCH v4.16-rc5 1/3] " jason.vas.dias
@ 2018-03-14 14:27   ` Thomas Gleixner
  2018-03-16  6:11   ` kbuild test robot
  1 sibling, 0 replies; 8+ messages in thread
From: Thomas Gleixner @ 2018-03-14 14:27 UTC (permalink / raw)
  To: jason.vas.dias; +Cc: linux-kernel, x86, mingo, peterz, andi

Jason,

On Wed, 14 Mar 2018, jason.vas.dias@gmail.com wrote:

this subject line is not really what it should be.

[PATCH v4.16-rc5 1/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

Documentation clearly says:

  The canonical patch subject line is::

      Subject: [PATCH 001/123] subsystem: summary phrase

  The ``summary phrase`` in the email's Subject should concisely describe
  the patch which that email contains.  The ``summary phrase`` should not
  be a filename.  Do not use the same ``summary phrase`` for every patch in
  a whole patch series (where a ``patch series`` is an ordered sequence of
  multiple, related patches).

Aside of that the text body of the patch lacks:

   1) A description of the patch

   2) Your Signed-off-by. Again: checkpatch.pl complains for a reason.

Is it really that hard to comply with the established and documented
proceedures?

> diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
> index f19856d..fbc7371 100644
> --- a/arch/x86/entry/vdso/vclock_gettime.c
> +++ b/arch/x86/entry/vdso/vclock_gettime.c
> @@ -182,6 +182,18 @@ notrace static u64 vread_tsc(void)
>  	return last;
>  }
>  
> +notrace static u64 vread_tsc_raw(void)
> +{
> +	u64 tsc
> +	  , last = gtod->raw_cycle_last;

This is hardly kernel coding style.

> +
> +	tsc	      = rdtsc_ordered();

and these spaces are pointless.

> +	if (likely(tsc >= last))
> +		return tsc;
> +	asm volatile ("");
> +	return last;
> +}

As I explained to you before: This function is not required because
gtod->cycle_last and gtod->raw_cycle_last are the same value. 

>  notrace static inline u64 vgetsns(int *mode)
>  {
>  	u64 v;
> @@ -203,6 +215,27 @@ notrace static inline u64 vgetsns(int *mode)
>  	return v * gtod->mult;
>  }
>  
> +notrace static inline u64 vgetsns_raw(int *mode)
> +{
> +	u64 v;
> +	cycles_t cycles;
> +
> +	if (gtod->vclock_mode == VCLOCK_TSC)
> +		cycles = vread_tsc_raw();
> +#ifdef CONFIG_PARAVIRT_CLOCK
> +	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
> +		cycles = vread_pvclock(mode);
> +#endif
> +#ifdef CONFIG_HYPERV_TSCPAGE
> +	else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
> +		cycles = vread_hvclock(mode);
> +#endif
> +	else
> +		return 0;
> +	v = (cycles - gtod->raw_cycle_last) & gtod->raw_mask;

gtod->raw_mask is the same as gtod->mask for obvious reasons. So the whole
thing can be simplified by extending vgetns() with a mult argument, which
is handed in from the call sites.

>  
> +	vdata->raw_cycle_last	= tk->tkr_raw.cycle_last;
> +	vdata->raw_mask		= tk->tkr_raw.mask;
> +	vdata->raw_mult		= tk->tkr_raw.mult;
> +	vdata->raw_shift	= tk->tkr_raw.shift;

Only the raw_mult/shift value needs to be stored.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4.16-rc5 2/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-14  4:20 ` [PATCH v4.16-rc5 2/3] " jason.vas.dias
@ 2018-03-14 14:48   ` Thomas Gleixner
  0 siblings, 0 replies; 8+ messages in thread
From: Thomas Gleixner @ 2018-03-14 14:48 UTC (permalink / raw)
  To: jason.vas.dias; +Cc: linux-kernel, x86, mingo, peterz, andi

On Wed, 14 Mar 2018, jason.vas.dias@gmail.com wrote:

Again: Read and comply with Documentation/process/ and fix the complaints
of checkpatch.pl.

> diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
> index fbc7371..2c46675 100644
> --- a/arch/x86/entry/vdso/vclock_gettime.c
> +++ b/arch/x86/entry/vdso/vclock_gettime.c
> @@ -184,10 +184,9 @@ notrace static u64 vread_tsc(void)
>  
>  notrace static u64 vread_tsc_raw(void)
>  {
> -	u64 tsc
> +	u64 tsc  = (gtod->has_rdtscp ? rdtscp((void*)0) : rdtsc_ordered())
>  	  , last = gtod->raw_cycle_last;

Aside of the totally broken coding style including usage of (void*)0 :

Did you ever benchmark rdtscp() against rdtsc_ordered()?

If so, then the results want to be documented in the changelog and this
change only makes sense when rdtscp() is actually faster.

Please document how you measured that so others can actually run the same
tests and make their own judgement.

If it would turn out that rdtscp() is faster, which I doubt, then the
conditional is the wrong way to do that. It wants to be patched at boot
time which completely avoids conditionals.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4.16-rc5 1/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-14  4:20 ` [PATCH v4.16-rc5 1/3] " jason.vas.dias
  2018-03-14 14:27   ` Thomas Gleixner
@ 2018-03-16  6:11   ` kbuild test robot
  1 sibling, 0 replies; 8+ messages in thread
From: kbuild test robot @ 2018-03-16  6:11 UTC (permalink / raw)
  To: jason.vas.dias; +Cc: kbuild-all, linux-kernel, x86, tglx, mingo, peterz, andi

[-- Attachment #1: Type: text/plain, Size: 1336 bytes --]

Hi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]
[also build test ERROR on next-20180315]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/jason-vas-dias-gmail-com/x86-vdso-on-Intel-VDSO-should-handle-CLOCK_MONOTONIC_RAW/20180316-070319
config: x86_64-rhel (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   arch/x86/entry/vdso/vclock_gettime.o: In function `__vdso_clock_gettime':
   vclock_gettime.c:(.text+0xf7): undefined reference to `__x86_indirect_thunk_rax'
   /usr/bin/ld: arch/x86/entry/vdso/vclock_gettime.o: relocation R_X86_64_PC32 against undefined symbol `__x86_indirect_thunk_rax' can not be used when making a shared object; recompile with -fPIC
   /usr/bin/ld: final link failed: Bad value
>> collect2: error: ld returned 1 exit status
--
>> arch/x86/entry/vdso/vdso32.so.dbg: undefined symbols found
--
>> objcopy: 'arch/x86/entry/vdso/vdso64.so.dbg': No such file

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 40788 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v4.16-rc5 1/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-15 16:00 [PATCH v4.16-rc5 (3)] " jason.vas.dias
@ 2018-03-15 16:00 ` jason.vas.dias
  0 siblings, 0 replies; 8+ messages in thread
From: jason.vas.dias @ 2018-03-15 16:00 UTC (permalink / raw)
  To: linux-kernel; +Cc: x86, tglx, mingo, peterz, andi

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d..fbc7371 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -182,6 +182,18 @@ notrace static u64 vread_tsc(void)
 	return last;
 }
 
+notrace static u64 vread_tsc_raw(void)
+{
+	u64 tsc
+	  , last = gtod->raw_cycle_last;
+
+	tsc	      = rdtsc_ordered();
+	if (likely(tsc >= last))
+		return tsc;
+	asm volatile ("");
+	return last;
+}
+
 notrace static inline u64 vgetsns(int *mode)
 {
 	u64 v;
@@ -203,6 +215,27 @@ notrace static inline u64 vgetsns(int *mode)
 	return v * gtod->mult;
 }
 
+notrace static inline u64 vgetsns_raw(int *mode)
+{
+	u64 v;
+	cycles_t cycles;
+
+	if (gtod->vclock_mode == VCLOCK_TSC)
+		cycles = vread_tsc_raw();
+#ifdef CONFIG_PARAVIRT_CLOCK
+	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
+		cycles = vread_pvclock(mode);
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+	else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
+		cycles = vread_hvclock(mode);
+#endif
+	else
+		return 0;
+	v = (cycles - gtod->raw_cycle_last) & gtod->raw_mask;
+	return v * gtod->raw_mult;
+}
+
 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
 notrace static int __always_inline do_realtime(struct timespec *ts)
 {
@@ -246,6 +279,27 @@ notrace static int __always_inline do_monotonic(struct timespec *ts)
 	return mode;
 }
 
+notrace static __always_inline int do_monotonic_raw(struct timespec *ts)
+{
+	unsigned long seq;
+	u64 ns;
+	int mode;
+
+	do {
+		seq = gtod_read_begin(gtod);
+		mode = gtod->vclock_mode;
+		ts->tv_sec = gtod->monotonic_time_raw_sec;
+		ns = gtod->monotonic_time_raw_nsec;
+		ns += vgetsns_raw(&mode);
+		ns >>= gtod->raw_shift;
+	} while (unlikely(gtod_read_retry(gtod, seq)));
+
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
+	return mode;
+}
+
 notrace static void do_realtime_coarse(struct timespec *ts)
 {
 	unsigned long seq;
@@ -277,6 +331,10 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 		if (do_monotonic(ts) == VCLOCK_NONE)
 			goto fallback;
 		break;
+	case CLOCK_MONOTONIC_RAW:
+		if (do_monotonic_raw(ts) == VCLOCK_NONE)
+			goto fallback;
+		break;
 	case CLOCK_REALTIME_COARSE:
 		do_realtime_coarse(ts);
 		break;
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index e1216dd..5af7093 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -45,6 +45,11 @@ void update_vsyscall(struct timekeeper *tk)
 	vdata->mult		= tk->tkr_mono.mult;
 	vdata->shift		= tk->tkr_mono.shift;
 
+	vdata->raw_cycle_last	= tk->tkr_raw.cycle_last;
+	vdata->raw_mask		= tk->tkr_raw.mask;
+	vdata->raw_mult		= tk->tkr_raw.mult;
+	vdata->raw_shift	= tk->tkr_raw.shift;
+
 	vdata->wall_time_sec		= tk->xtime_sec;
 	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
 
@@ -74,5 +79,8 @@ void update_vsyscall(struct timekeeper *tk)
 		vdata->monotonic_time_coarse_sec++;
 	}
 
+	vdata->monotonic_time_raw_sec  = tk->raw_sec;
+	vdata->monotonic_time_raw_nsec = tk->tkr_raw.xtime_nsec;
+
 	gtod_write_end(vdata);
 }
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index fb856c9..24e4d45 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -22,6 +22,10 @@ struct vsyscall_gtod_data {
 	u64	mask;
 	u32	mult;
 	u32	shift;
+	u64	raw_cycle_last;
+	u64	raw_mask;
+	u32	raw_mult;
+	u32	raw_shift;
 
 	/* open coded 'struct timespec' */
 	u64		wall_time_snsec;
@@ -32,6 +36,8 @@ struct vsyscall_gtod_data {
 	gtod_long_t	wall_time_coarse_nsec;
 	gtod_long_t	monotonic_time_coarse_sec;
 	gtod_long_t	monotonic_time_coarse_nsec;
+	gtod_long_t	monotonic_time_raw_sec;
+	gtod_long_t	monotonic_time_raw_nsec;
 
 	int		tz_minuteswest;
 	int		tz_dsttime;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-03-16  6:12 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-14  4:20 [PATCH v4.16-rc5 (3)] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW jason.vas.dias
2018-03-14  4:20 ` [PATCH v4.16-rc5 1/3] " jason.vas.dias
2018-03-14 14:27   ` Thomas Gleixner
2018-03-16  6:11   ` kbuild test robot
2018-03-14  4:20 ` [PATCH v4.16-rc5 2/3] " jason.vas.dias
2018-03-14 14:48   ` Thomas Gleixner
2018-03-14  4:20 ` [PATCH v4.16-rc5 3/3] " jason.vas.dias
2018-03-15 16:00 [PATCH v4.16-rc5 (3)] " jason.vas.dias
2018-03-15 16:00 ` [PATCH v4.16-rc5 1/3] " jason.vas.dias

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).