linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
@ 2018-03-11 19:46 Jason Vas Dias
  2018-03-12  8:26 ` kbuild test robot
  0 siblings, 1 reply; 11+ messages in thread
From: Jason Vas Dias @ 2018-03-11 19:46 UTC (permalink / raw)
  To: x86, LKML, Thomas Gleixner, andi, Peter Zijlstra


  Currently the VDSO does not handle
     clock_gettime( CLOCK_MONOTONIC_RAW, &ts )
  on Intel / AMD - it calls
     vdso_fallback_gettime()
  for this clock, which issues a syscall, having an unacceptably high
  latency (minimum measurable time or time between measurements)
  of 300-700ns on 2 2.8-3.9ghz Haswell x86_64 Family'_'Model : 06_3C
  machines under various versions of Linux.

  Sometimes, particularly when correlating elapsed time to performance
  counter values,  code needs to know elapsed time from the perspective
  of the CPU no matter how "hot" / fast or "cold" / slow it might be
  running wrt NTP / PTP ; when code needs this, the latencies with
  a syscall are often unacceptably high.

  I reported this as Bug #198161 :
    'https://bugzilla.kernel.org/show_bug.cgi?id=198961'
  and in previous posts with subjects matching 'CLOCK_MONOTONIC_RAW' .
     
  This patch handles CLOCK_MONOTONIC_RAW clock_gettime() in the VDSO ,
  by exporting the raw clock calibration, last cycles, last xtime_nsec,
  and last raw_sec value in the vsyscall_gtod_data during vsyscall_update() .

  Now the new do_monotonic_raw() function in the vDSO has a latency of @ 24ns
  on average, and the test program:
   tools/testing/selftest/timers/inconsistency-check.c
  succeeds with arguments: '-c 4 -t 120' or any arbitrary -t value.

  The patch is against Linus' latest 4.16-rc4 tree,
  current HEAD of :
    git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
  .

  The patch affects only files:
  
   arch/x86/include/asm/vgtod.h
   arch/x86/entry/vdso/vclock_gettime.c
   arch/x86/entry/vsyscall/vsyscall_gtod.c


  This is a resend of the original patch fixing indentation issues
  after installation of emacs Lisp cc-mode hooks in
      Documentation/coding-style.rst
  and calling 'indent-region' and 'tabify' (whitespace only changes) -
  SORRY !
  (and even after that, somehow 2 '\t\n's got left in vgtod.h -
   now removed - sorry again!) .

  Best Regards,
     Jason Vas Dias  .

  PATCH:
---     
diff -up linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c.4.16-rc4 linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c
--- linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c.4.16-rc4	2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c	2018-03-11 19:00:04.630019100 +0000
@@ -182,6 +182,29 @@ notrace static u64 vread_tsc(void)
 	return last;
 }
 
+notrace static u64 vread_tsc_raw(void)
+{
+	u64 tsc, last=gtod->raw_cycle_last;
+	if( likely( gtod->has_rdtscp ) ) {
+		u32	tsc_lo, tsc_hi,
+			tsc_cpu __attribute__((unused));
+		asm volatile
+			( "rdtscp"
+				/* ^- has built-in cancellation point / pipeline stall"barrier" */
+				:   "=a" (tsc_lo)
+				  , "=d" (tsc_hi)
+				  , "=c" (tsc_cpu)
+			); // since all variables 32-bit, eax, edx, ecx used - NOT rax, rdx, rcx
+		tsc	= ((((u64)tsc_hi) & 0xffffffffUL) << 32) | (((u64)tsc_lo) & 0xffffffffUL);
+	} else {
+		tsc	= rdtsc_ordered();
+	}
+	if (likely(tsc >= last))
+		return tsc;
+	asm volatile ("");
+	return last;
+}
+
 notrace static inline u64 vgetsns(int *mode)
 {
 	u64 v;
@@ -203,6 +226,27 @@ notrace static inline u64 vgetsns(int *m
 	return v * gtod->mult;
 }
 
+notrace static inline u64 vgetsns_raw(int *mode)
+{
+	u64 v;
+	cycles_t cycles;
+
+	if (gtod->vclock_mode == VCLOCK_TSC)
+		cycles = vread_tsc_raw();
+#ifdef CONFIG_PARAVIRT_CLOCK
+	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
+		cycles = vread_pvclock(mode);
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+	else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
+		cycles = vread_hvclock(mode);
+#endif
+	else
+		return 0;
+	v = (cycles - gtod->raw_cycle_last) & gtod->raw_mask;
+	return v * gtod->raw_mult;
+}
+
 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
 notrace static int __always_inline do_realtime(struct timespec *ts)
 {
@@ -246,6 +290,27 @@ notrace static int __always_inline do_mo
 	return mode;
 }
 
+notrace static int __always_inline do_monotonic_raw( struct timespec *ts)
+{
+	unsigned long seq;
+	u64 ns;
+	int mode;
+
+	do {
+		seq = gtod_read_begin(gtod);
+		mode = gtod->vclock_mode;
+		ts->tv_sec = gtod->monotonic_time_raw_sec;
+		ns = gtod->monotonic_time_raw_nsec;
+		ns += vgetsns_raw(&mode);
+		ns >>= gtod->raw_shift;
+	} while (unlikely(gtod_read_retry(gtod, seq)));
+
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
+	return mode;
+}
+
 notrace static void do_realtime_coarse(struct timespec *ts)
 {
 	unsigned long seq;
@@ -277,6 +342,10 @@ notrace int __vdso_clock_gettime(clockid
 		if (do_monotonic(ts) == VCLOCK_NONE)
 			goto fallback;
 		break;
+	case CLOCK_MONOTONIC_RAW:
+		if (do_monotonic_raw(ts) == VCLOCK_NONE)
+			goto fallback;
+		break;
 	case CLOCK_REALTIME_COARSE:
 		do_realtime_coarse(ts);
 		break;
diff -up linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c.4.16-rc4 linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c
--- linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c.4.16-rc4	2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c	2018-03-11 19:06:36.584178867 +0000
@@ -16,6 +16,7 @@
 #include <linux/timekeeper_internal.h>
 #include <asm/vgtod.h>
 #include <asm/vvar.h>
+#include <asm/cpufeature.h>
 
 int vclocks_used __read_mostly;
 
@@ -45,6 +46,12 @@ void update_vsyscall(struct timekeeper *
 	vdata->mult		= tk->tkr_mono.mult;
 	vdata->shift		= tk->tkr_mono.shift;
 
+	vdata->raw_cycle_last	= tk->tkr_raw.cycle_last;
+	vdata->raw_mask		= tk->tkr_raw.mask;
+	vdata->raw_mult		= tk->tkr_raw.mult;
+	vdata->raw_shift	= tk->tkr_raw.shift;
+	vdata->has_rdtscp	= static_cpu_has(X86_FEATURE_RDTSCP);
+
 	vdata->wall_time_sec		= tk->xtime_sec;
 	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
 
@@ -74,5 +81,8 @@ void update_vsyscall(struct timekeeper *
 		vdata->monotonic_time_coarse_sec++;
 	}
 
+	vdata->monotonic_time_raw_sec  = tk->raw_sec;
+	vdata->monotonic_time_raw_nsec = tk->tkr_raw.xtime_nsec;
+
 	gtod_write_end(vdata);
 }
diff -up linux-4.16-rc4/arch/x86/include/asm/vgtod.h.4.16-rc4 linux-4.16-rc4/arch/x86/include/asm/vgtod.h
--- linux-4.16-rc4/arch/x86/include/asm/vgtod.h.4.16-rc4	2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/include/asm/vgtod.h	2018-03-11 19:34:39.290617098 +0000
@@ -22,6 +22,11 @@ struct vsyscall_gtod_data {
 	u64	mask;
 	u32	mult;
 	u32	shift;
+	u64	raw_cycle_last;
+	u64	raw_mask;
+	u32	raw_mult;
+	u32	raw_shift;
+	u32	has_rdtscp;
 
 	/* open coded 'struct timespec' */
 	u64		wall_time_snsec;
@@ -32,6 +37,8 @@ struct vsyscall_gtod_data {
 	gtod_long_t	wall_time_coarse_nsec;
 	gtod_long_t	monotonic_time_coarse_sec;
 	gtod_long_t	monotonic_time_coarse_nsec;
+	gtod_long_t	monotonic_time_raw_sec;
+	gtod_long_t	monotonic_time_raw_nsec;
 
 	int		tz_minuteswest;
 	int		tz_dsttime;
---

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-11 19:46 [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW Jason Vas Dias
@ 2018-03-12  8:26 ` kbuild test robot
  0 siblings, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2018-03-12  8:26 UTC (permalink / raw)
  To: Jason Vas Dias
  Cc: kbuild-all, x86, LKML, Thomas Gleixner, andi, Peter Zijlstra

[-- Attachment #1: Type: text/plain, Size: 1557 bytes --]

Hi Jason,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]

url:    https://github.com/0day-ci/linux/commits/Jason-Vas-Dias/x86-vdso-on-Intel-VDSO-should-handle-CLOCK_MONOTONIC_RAW/20180312-161442
config: x86_64-randconfig-x010-201810 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   arch/x86/entry/vdso/vclock_gettime.o: In function `__vdso_clock_gettime':
   arch/x86/entry/vdso/vclock_gettime.c:336: undefined reference to `__x86_indirect_thunk_rax'
   /usr/bin/ld: arch/x86/entry/vdso/vclock_gettime.o: relocation R_X86_64_PC32 against undefined symbol `__x86_indirect_thunk_rax' can not be used when making a shared object; recompile with -fPIC
   /usr/bin/ld: final link failed: Bad value
>> collect2: error: ld returned 1 exit status
--
   arch/x86//entry/vdso/vclock_gettime.o: In function `__vdso_clock_gettime':
   arch/x86//entry/vdso/vclock_gettime.c:336: undefined reference to `__x86_indirect_thunk_rax'
   /usr/bin/ld: arch/x86//entry/vdso/vclock_gettime.o: relocation R_X86_64_PC32 against undefined symbol `__x86_indirect_thunk_rax' can not be used when making a shared object; recompile with -fPIC
   /usr/bin/ld: final link failed: Bad value
>> collect2: error: ld returned 1 exit status

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 26111 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-11  6:25 Jason Vas Dias
  2018-03-12  6:37 ` kbuild test robot
@ 2018-03-12  6:41 ` kbuild test robot
  1 sibling, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2018-03-12  6:41 UTC (permalink / raw)
  To: Jason Vas Dias
  Cc: kbuild-all, x86, LKML, Thomas Gleixner, andi, Peter Zijlstra

[-- Attachment #1: Type: text/plain, Size: 4716 bytes --]

Hi Jason,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]

url:    https://github.com/0day-ci/linux/commits/Jason-Vas-Dias/x86-vdso-on-Intel-VDSO-should-handle-CLOCK_MONOTONIC_RAW/20180312-141707
config: x86_64-randconfig-x002-201810 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   arch/x86/entry/vdso/vclock_gettime.o: In function `__vdso_clock_gettime':
>> arch/x86/entry/vdso/vclock_gettime.c:336: undefined reference to `__x86_indirect_thunk_rax'
   /usr/bin/ld: arch/x86/entry/vdso/vclock_gettime.o: relocation R_X86_64_PC32 against undefined symbol `__x86_indirect_thunk_rax' can not be used when making a shared object; recompile with -fPIC
   /usr/bin/ld: final link failed: Bad value
   collect2: error: ld returned 1 exit status

vim +336 arch/x86/entry/vdso/vclock_gettime.c

da15cfda arch/x86/vdso/vclock_gettime.c       John Stultz     2009-08-19  333  
23adec55 arch/x86/vdso/vclock_gettime.c       Steven Rostedt  2008-05-12  334  notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
2aae950b arch/x86_64/vdso/vclock_gettime.c    Andi Kleen      2007-07-21  335  {
2aae950b arch/x86_64/vdso/vclock_gettime.c    Andi Kleen      2007-07-21 @336  	switch (clock) {
2aae950b arch/x86_64/vdso/vclock_gettime.c    Andi Kleen      2007-07-21  337  	case CLOCK_REALTIME:
ce39c640 arch/x86/vdso/vclock_gettime.c       Stefani Seibold 2014-03-17  338  		if (do_realtime(ts) == VCLOCK_NONE)
ce39c640 arch/x86/vdso/vclock_gettime.c       Stefani Seibold 2014-03-17  339  			goto fallback;
da15cfda arch/x86/vdso/vclock_gettime.c       John Stultz     2009-08-19  340  		break;
2aae950b arch/x86_64/vdso/vclock_gettime.c    Andi Kleen      2007-07-21  341  	case CLOCK_MONOTONIC:
ce39c640 arch/x86/vdso/vclock_gettime.c       Stefani Seibold 2014-03-17  342  		if (do_monotonic(ts) == VCLOCK_NONE)
ce39c640 arch/x86/vdso/vclock_gettime.c       Stefani Seibold 2014-03-17  343  			goto fallback;
da15cfda arch/x86/vdso/vclock_gettime.c       John Stultz     2009-08-19  344  		break;
ff72916d arch/x86/entry/vdso/vclock_gettime.c Jason Vas Dias  2018-03-11  345  	case CLOCK_MONOTONIC_RAW:
ff72916d arch/x86/entry/vdso/vclock_gettime.c Jason Vas Dias  2018-03-11  346  		if (do_monotonic_raw(ts) == VCLOCK_NONE)
ff72916d arch/x86/entry/vdso/vclock_gettime.c Jason Vas Dias  2018-03-11  347  			goto fallback;
ff72916d arch/x86/entry/vdso/vclock_gettime.c Jason Vas Dias  2018-03-11  348  		break;
da15cfda arch/x86/vdso/vclock_gettime.c       John Stultz     2009-08-19  349  	case CLOCK_REALTIME_COARSE:
ce39c640 arch/x86/vdso/vclock_gettime.c       Stefani Seibold 2014-03-17  350  		do_realtime_coarse(ts);
ce39c640 arch/x86/vdso/vclock_gettime.c       Stefani Seibold 2014-03-17  351  		break;
da15cfda arch/x86/vdso/vclock_gettime.c       John Stultz     2009-08-19  352  	case CLOCK_MONOTONIC_COARSE:
ce39c640 arch/x86/vdso/vclock_gettime.c       Stefani Seibold 2014-03-17  353  		do_monotonic_coarse(ts);
ce39c640 arch/x86/vdso/vclock_gettime.c       Stefani Seibold 2014-03-17  354  		break;
ce39c640 arch/x86/vdso/vclock_gettime.c       Stefani Seibold 2014-03-17  355  	default:
ce39c640 arch/x86/vdso/vclock_gettime.c       Stefani Seibold 2014-03-17  356  		goto fallback;
2aae950b arch/x86_64/vdso/vclock_gettime.c    Andi Kleen      2007-07-21  357  	}
0d7b8547 arch/x86/vdso/vclock_gettime.c       Andy Lutomirski 2011-06-05  358  
a939e817 arch/x86/vdso/vclock_gettime.c       John Stultz     2012-03-01  359  	return 0;
ce39c640 arch/x86/vdso/vclock_gettime.c       Stefani Seibold 2014-03-17  360  fallback:
ce39c640 arch/x86/vdso/vclock_gettime.c       Stefani Seibold 2014-03-17  361  	return vdso_fallback_gettime(clock, ts);
2aae950b arch/x86_64/vdso/vclock_gettime.c    Andi Kleen      2007-07-21  362  }
2aae950b arch/x86_64/vdso/vclock_gettime.c    Andi Kleen      2007-07-21  363  int clock_gettime(clockid_t, struct timespec *)
2aae950b arch/x86_64/vdso/vclock_gettime.c    Andi Kleen      2007-07-21  364  	__attribute__((weak, alias("__vdso_clock_gettime")));
2aae950b arch/x86_64/vdso/vclock_gettime.c    Andi Kleen      2007-07-21  365  

:::::: The code at line 336 was first introduced by commit
:::::: 2aae950b21e4bc789d1fc6668faf67e8748300b7 x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu

:::::: TO: Andi Kleen <ak@suse.de>
:::::: CC: Linus Torvalds <torvalds@woody.linux-foundation.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 30107 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-11  6:25 Jason Vas Dias
@ 2018-03-12  6:37 ` kbuild test robot
  2018-03-12  6:41 ` kbuild test robot
  1 sibling, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2018-03-12  6:37 UTC (permalink / raw)
  To: Jason Vas Dias
  Cc: kbuild-all, x86, LKML, Thomas Gleixner, andi, Peter Zijlstra

[-- Attachment #1: Type: text/plain, Size: 689 bytes --]

Hi Jason,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]

url:    https://github.com/0day-ci/linux/commits/Jason-Vas-Dias/x86-vdso-on-Intel-VDSO-should-handle-CLOCK_MONOTONIC_RAW/20180312-141707
config: i386-randconfig-x006-201810 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

>> arch/x86/entry/vdso/vdso32.so.dbg: undefined symbols found

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28357 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-11 22:03   ` Jason Vas Dias
@ 2018-03-11 22:51     ` Thomas Gleixner
  0 siblings, 0 replies; 11+ messages in thread
From: Thomas Gleixner @ 2018-03-11 22:51 UTC (permalink / raw)
  To: Jason Vas Dias; +Cc: x86, LKML, andi, Peter Zijlstra

On Sun, 11 Mar 2018, Jason Vas Dias wrote:
> On 11/03/2018, Thomas Gleixner <tglx@linutronix.de> wrote:
> > On Sun, 11 Mar 2018, Jason Vas Dias wrote:
> >
> > This looks better now. Though running that patch through checkpatch.pl
> > results in:
> >
> > total: 28 errors, 20 warnings, 139 lines checked
> >
> 
> Hmm, I was unaware of that script, I'll run and find out why -

Documentation/process/* which I recommended for reading before definitely
mentions it.

> probably because whitespace is not visible in emacs with
> my monospace font and it is very difficult to see if tabs
> are used if somehow a '\t\ ' or ' \t' has slipped in .

It's not only about whitespace ....

> >> +notrace static u64 vread_tsc_raw(void)
> >
> > Why do you need a separate function? I asked you to use vread_tsc(). So you
> > might have reasons for doing that, but please then explain WHY and not just
> > throw the stuff in my direction w/o any comment.
> >
> 
> mainly, because vread_tsc() makes its comparison against gtod->cycles_last ,
> a copy of tk->tkr_mono.cycle_last, while vread_tsc_raw() uses
> gtod->raw_cycle_last, a copy of tk->tkr_raw.cycle_last .

Well, if you look at the timekeeping core then you'll notice that it's
actually the same value. It's updated in both tkr_mono and tkr_raw so its
available when either of the tkr_* structs is handed to the relevant
functions.

> A complete patch , against 4.15.9, is attached , that I am using ,
> including a suggested '__vdso_linux_tsc_calibration()'
> function and arch/x86/include/uapi/asm/vdso_tsc_calibration.h file
> that does not return any pointers into the VDSO .

I'm not going to look at attached complete patches. The development process
is well defined for a reason and it's not optional.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-11 18:01 ` Thomas Gleixner
@ 2018-03-11 22:03   ` Jason Vas Dias
  2018-03-11 22:51     ` Thomas Gleixner
  0 siblings, 1 reply; 11+ messages in thread
From: Jason Vas Dias @ 2018-03-11 22:03 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: x86, LKML, andi, Peter Zijlstra

[-- Attachment #1: Type: text/plain, Size: 3135 bytes --]

Thanks Thomas -

On 11/03/2018, Thomas Gleixner <tglx@linutronix.de> wrote:
> On Sun, 11 Mar 2018, Jason Vas Dias wrote:
>
> This looks better now. Though running that patch through checkpatch.pl
> results in:
>
> total: 28 errors, 20 warnings, 139 lines checked
>

Hmm, I was unaware of that script, I'll run and find out why -
probably because whitespace is not visible in emacs with
my monospace font and it is very difficult to see if tabs
are used if somehow a '\t\ ' or ' \t' has slipped in .

I'll run the script, fix the errors. and repost.

> ....
>
>> +notrace static u64 vread_tsc_raw(void)
>
> Why do you need a separate function? I asked you to use vread_tsc(). So you
> might have reasons for doing that, but please then explain WHY and not just
> throw the stuff in my direction w/o any comment.
>

mainly, because vread_tsc() makes its comparison against gtod->cycles_last ,
a copy of tk->tkr_mono.cycle_last, while vread_tsc_raw() uses
gtod->raw_cycle_last, a copy of tk->tkr_raw.cycle_last .

And rdtscp has a built-in "barrier", as the comments explain, making
rdtsc_ordered()'s 'barrier()' unnecessary .


>> +{
>> +        u64 tsc, last=gtod->raw_cycle_last;
>> +        if( likely( gtod->has_rdtscp ) ) {
>> +                u32     tsc_lo, tsc_hi,
>> +                        tsc_cpu __attribute__((unused));
>> +                asm volatile
>> +                ( "rdtscp"
>> +                /* ^- has built-in cancellation point / pipeline stall
>> "barrier" */
>> +                : "=a" (tsc_lo)
>> +                , "=d" (tsc_hi)
>> +                , "=c" (tsc_cpu)
>> +                ); // since all variables 32-bit, eax, edx, ecx used -
>> NOT rax, rdx, rcx
>> +                tsc      = ((((u64)tsc_hi) & 0xffffffffUL) << 32) |
>> (((u64)tsc_lo) & 0xffffffffUL);
>
> This is not required to make the vdso accessor for monotonic raw work.
>
> If at all then the rdtscp support wants to be in a separate patch with a
> proper explanation.
>


> Aside of that the code for rdtscp wants to be in a proper inline helper in
> the relevant header file and written according to the coding style the
> kernel uses for asm inlines.
>

Sorry, I will put the function in the same header as rdtsc_ordered () ,
in a separate patch.

> The rest looks ok.
>
> Thanks,
>
> 	tglx
>

I'll re-generate patches and resend .

A complete patch , against 4.15.9, is attached , that I am using ,
including a suggested '__vdso_linux_tsc_calibration()'
function and arch/x86/include/uapi/asm/vdso_tsc_calibration.h file
that does not return any pointers into the VDSO .

Presuming this was split into separate patches as you suggest,
and was against the latest HEAD branch (4.16-rcX), would it be OK to
include the vdso_linux_tsc_calibration() work ?
It does enable user space code to develop accurate TSC readers
which are free to use different structures and pico-second resolution.
The actual user-space clock_gettime(CLOCK_MONOTONIC_RAW)
replacement I am using for work just reads the TSC ,  with a latency of
< 8ns, and uses the linux_tsc_calibration to convert using
floating-point as required.

Thanks & Regards,
Jason

[-- Attachment #2: vdso_gettime_monotonic_raw-4.15.9.patch --]
[-- Type: application/octet-stream, Size: 10347 bytes --]

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d..050aea7 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -21,6 +21,7 @@
 #include <linux/math64.h>
 #include <linux/time.h>
 #include <linux/kernel.h>
+#include <uapi/asm/vdso_tsc_calibration.h>
 
 #define gtod (&VVAR(vsyscall_gtod_data))
 
@@ -57,7 +58,6 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 	return ret;
 }
 
-
 #else
 
 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
@@ -182,6 +182,29 @@ notrace static u64 vread_tsc(void)
 	return last;
 }
 
+notrace static u64 vread_tsc_raw(void)
+{
+        u64 tsc, last=gtod->raw_cycle_last;
+        if( likely( gtod->has_rdtscp ) ) {
+                u32     tsc_lo, tsc_hi,
+                        tsc_cpu __attribute__((unused));
+                asm volatile
+                ( "rdtscp"
+                /* ^- has built-in cancellation point / pipeline stall "barrier" */
+                : "=a" (tsc_lo)
+                , "=d" (tsc_hi)
+                , "=c" (tsc_cpu)
+                ); // since all variables 32-bit, eax, edx, ecx used - NOT rax, rdx, rcx
+                tsc      = ((((u64)tsc_hi) & 0xffffffffUL) << 32) | (((u64)tsc_lo) & 0xffffffffUL);
+        } else {
+                tsc      = rdtsc_ordered();
+        }
+	if (likely(tsc >= last))
+		return tsc;
+        asm volatile ("");
+        return last;
+}
+
 notrace static inline u64 vgetsns(int *mode)
 {
 	u64 v;
@@ -203,6 +226,27 @@ notrace static inline u64 vgetsns(int *mode)
 	return v * gtod->mult;
 }
 
+notrace static inline u64 vgetsns_raw(int *mode)
+{
+	u64 v;
+	cycles_t cycles;
+
+	if (gtod->vclock_mode == VCLOCK_TSC)
+		cycles = vread_tsc_raw();
+#ifdef CONFIG_PARAVIRT_CLOCK
+	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
+		cycles = vread_pvclock(mode);
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+	else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
+		cycles = vread_hvclock(mode);
+#endif
+	else
+		return 0;
+	v = (cycles - gtod->raw_cycle_last) & gtod->raw_mask;
+	return v * gtod->raw_mult;
+}
+
 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
 notrace static int __always_inline do_realtime(struct timespec *ts)
 {
@@ -246,6 +290,27 @@ notrace static int __always_inline do_monotonic(struct timespec *ts)
 	return mode;
 }
 
+notrace static int __always_inline do_monotonic_raw( struct timespec *ts)
+{
+	unsigned long seq;
+	u64 ns;
+	int mode;
+
+	do {
+		seq = gtod_read_begin(gtod);
+		mode = gtod->vclock_mode;
+		ts->tv_sec = gtod->monotonic_time_raw_sec;
+		ns = gtod->monotonic_time_raw_nsec;
+		ns += vgetsns_raw(&mode);
+		ns >>= gtod->raw_shift;
+	} while (unlikely(gtod_read_retry(gtod, seq)));
+
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
+	return mode;
+}
+
 notrace static void do_realtime_coarse(struct timespec *ts)
 {
 	unsigned long seq;
@@ -277,6 +342,10 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 		if (do_monotonic(ts) == VCLOCK_NONE)
 			goto fallback;
 		break;
+	case CLOCK_MONOTONIC_RAW:
+		if (do_monotonic_raw(ts) == VCLOCK_NONE)
+			goto fallback;
+		break;
 	case CLOCK_REALTIME_COARSE:
 		do_realtime_coarse(ts);
 		break;
@@ -326,3 +395,23 @@ notrace time_t __vdso_time(time_t *t)
 }
 time_t time(time_t *t)
 	__attribute__((weak, alias("__vdso_time")));
+
+extern unsigned
+__vdso_linux_tsc_calibration(struct linux_tsc_calibration *);
+
+notrace	unsigned
+__vdso_linux_tsc_calibration(struct linux_tsc_calibration *tsc_cal)
+{
+	if ( (gtod->vclock_mode == VCLOCK_TSC) && (tsc_cal != ((void*)0UL)) )
+	{
+		tsc_cal -> tsc_khz = gtod->tsc_khz;
+		tsc_cal -> mult    = gtod->raw_mult;
+		tsc_cal -> shift   = gtod->raw_shift;
+		return 1;
+	}
+	return 0;
+}
+
+const struct linux_tsc_calibration * linux_tsc_calibration(void)
+	__attribute((weak, alias("__vdso_linux_tsc_calibration")));
+
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index d3a2dce..e0b5cce 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -25,6 +25,8 @@ VERSION {
 		__vdso_getcpu;
 		time;
 		__vdso_time;
+		linux_tsc_calibration;
+		__vdso_linux_tsc_calibration;
 	local: *;
 	};
 }
diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
index 422764a..17fd07f 100644
--- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
@@ -26,6 +26,7 @@ VERSION
 		__vdso_clock_gettime;
 		__vdso_gettimeofday;
 		__vdso_time;
+		__vdso_linux_tsc_calibration;
 	};
 
 	LINUX_2.5 {
diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S
index 05cd1c5..b69b0bf 100644
--- a/arch/x86/entry/vdso/vdsox32.lds.S
+++ b/arch/x86/entry/vdso/vdsox32.lds.S
@@ -21,6 +21,7 @@ VERSION {
 		__vdso_gettimeofday;
 		__vdso_getcpu;
 		__vdso_time;
+		__vdso_linux_tsc_calibration ;
 	local: *;
 	};
 }
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index e1216dd..5fc49b9b 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -16,11 +16,14 @@
 #include <linux/timekeeper_internal.h>
 #include <asm/vgtod.h>
 #include <asm/vvar.h>
+#include <asm/cpufeature.h>
 
 int vclocks_used __read_mostly;
 
 DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
 
+extern unsigned int tsc_khz;
+
 void update_vsyscall_tz(void)
 {
 	vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
@@ -45,6 +48,12 @@ void update_vsyscall(struct timekeeper *tk)
 	vdata->mult		= tk->tkr_mono.mult;
 	vdata->shift		= tk->tkr_mono.shift;
 
+	vdata->raw_cycle_last   = tk->tkr_raw.cycle_last;
+	vdata->raw_mask         = tk->tkr_raw.mask;
+	vdata->raw_mult         = tk->tkr_raw.mult;
+	vdata->raw_shift	= tk->tkr_raw.shift;
+	vdata->has_rdtscp	= static_cpu_has(X86_FEATURE_RDTSCP);
+
 	vdata->wall_time_sec		= tk->xtime_sec;
 	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
 
@@ -74,5 +83,9 @@ void update_vsyscall(struct timekeeper *tk)
 		vdata->monotonic_time_coarse_sec++;
 	}
 
+	vdata->monotonic_time_raw_sec  = tk->raw_sec;
+	vdata->monotonic_time_raw_nsec = tk->tkr_raw.xtime_nsec;
+	vdata->tsc_khz = tsc_khz;
+
 	gtod_write_end(vdata);
 }
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index fb856c9..4c474fa 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -22,7 +22,12 @@ struct vsyscall_gtod_data {
 	u64	mask;
 	u32	mult;
 	u32	shift;
-
+	u64	raw_cycle_last;
+	u64	raw_mask;
+	u32	raw_mult;
+	u32	raw_shift;
+	u32	has_rdtscp;
+	u32     tsc_khz;
 	/* open coded 'struct timespec' */
 	u64		wall_time_snsec;
 	gtod_long_t	wall_time_sec;
@@ -32,6 +37,8 @@ struct vsyscall_gtod_data {
 	gtod_long_t	wall_time_coarse_nsec;
 	gtod_long_t	monotonic_time_coarse_sec;
 	gtod_long_t	monotonic_time_coarse_nsec;
+	gtod_long_t	monotonic_time_raw_sec;
+	gtod_long_t	monotonic_time_raw_nsec;
 
 	int		tz_minuteswest;
 	int		tz_dsttime;
diff --git a/arch/x86/include/uapi/asm/vdso_tsc_calibration.h b/arch/x86/include/uapi/asm/vdso_tsc_calibration.h
new file mode 100644
index 0000000..3eded59
--- /dev/null
+++ b/arch/x86/include/uapi/asm/vdso_tsc_calibration.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_X86_VDSO_TSC_CALIBRATION_H
+#define _ASM_X86_VDSO_TSC_CALIBRATION_H
+/* 
+ * Programs that want to use rdtsc / rdtscp instructions
+ * from user-space can make use of the Linux kernel TSC calibration
+ * by calling :
+ *    __vdso_linux_tsc_calibration(struct linux_tsc_calibration_s *);
+ * ( one has to resolve this symbol as in 
+ *   tools/testing/selftests/vDSO/parse_vdso.c
+ * )
+ * which fills in a structure
+ * with the following layout :
+ */
+
+/** struct linux_tsc_calibration -
+ * mult:    amount to multiply 64-bit TSC value by
+ * shift:   the right shift to apply to (mult*TSC) yielding nanoseconds
+ * tsc_khz: the calibrated TSC frequency in KHz from which previous members calculated
+ */
+struct linux_tsc_calibration
+{
+        unsigned int mult;
+        unsigned int shift;
+        unsigned int tsc_khz;
+};
+
+/* To use:
+ *
+ *  static unsigned
+ *  (*linux_tsc_cal)(struct linux_tsc_calibration *linux_tsc_cal) = vdso_sym("LINUX_2.6", "__vdso_linux_tsc_calibration");
+ *  if( linux_tsc_cal == 0UL )
+ *  { fprintf(stderr,"the patch providing __vdso_linux_tsc_calibration is not applied to the kernel.\n");
+ *    return ERROR;
+ *  }
+ *  static const struct linux_tsc_calibration *clock_source=(void*)0UL;
+ *  if( ! (*linux_tsc_cal)(&clock_source) )
+ *    fprintf(stderr,"TSC is not the system clocksource.\n");
+ *  unsigned int tsc_lo, tsc_hi, tsc_cpu;
+ *  asm volatile
+ *  ( "rdtscp" : (=a) tsc_hi,  (=d) tsc_lo, (=c) tsc_cpu );
+ *  unsigned long tsc = (((unsigned long)tsc_hi) << 32) | tsc_lo;
+ *  unsigned long nanoseconds =
+ *   (( clock_source -> mult ) * tsc ) >> (clock_source -> shift);
+ *
+ *  nanoseconds is now TSC value converted to nanoseconds,
+ *  according to Linux' clocksource calibration values.
+ *  Incidentally, 'tsc_cpu' is the number of the CPU the task is running on.
+ *
+ * But better results are obtained by applying this to the difference (delta)
+ * and adding this to some previous timespec value:
+ *   static u64 previous_tsc=0, previous_nsec=0, previous_sec=0;
+ *   u64  tsc      = rdtscp();
+ *   u64  delta    = tsc - previous_tsc;
+ *   u64  nsec     = ((delta * clock_source->mult) + previous_nsec )
+ *	           >> clock_source->shift;
+ *   ts->tv_sec    = previous_sec + (nsec / NSEC_PER_SEC);
+ *   ts->tv_nsec   = nsec % NSEC_PER_SEC;
+ *   previous_tsc  = tsc
+ *   previous_sec  = ts->tv_sec;
+ *   previous_nsec = ts->tv_nsec << clock_source->shift;
+ *   return ts;
+ * This is the approach taken by Linux kernel & in VDSO .
+ *
+ * Or, in user-space, with floating point, one could use the rdtscp value as number of picoseconds :
+ *     u64 ns = lround( ((double)rdtscp()) / (((double)clock_source->tsc_khz) / 1e3) );
+ * (ie. if tsc_khz is 3000 , there are 3 tsc ticks per nanosecond, so divide tsc ticks by 3).
+ *
+ * There should actually be very little difference between the two values obtained (@ 0.02% )
+ * by either method.
+ */
+
+#endif

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-11  6:25 Jason Vas Dias
@ 2018-03-11 18:01 ` Thomas Gleixner
  2018-03-11 22:03   ` Jason Vas Dias
  0 siblings, 1 reply; 11+ messages in thread
From: Thomas Gleixner @ 2018-03-11 18:01 UTC (permalink / raw)
  To: Jason Vas Dias; +Cc: x86, LKML, andi, Peter Zijlstra

On Sun, 11 Mar 2018, Jason Vas Dias wrote:

This looks better now. Though running that patch through checkpatch.pl
results in:

total: 28 errors, 20 warnings, 139 lines checked

....

> +notrace static u64 vread_tsc_raw(void)

Why do you need a separate function? I asked you to use vread_tsc(). So you
might have reasons for doing that, but please then explain WHY and not just
throw the stuff in my direction w/o any comment.

> +{
> +        u64 tsc, last=gtod->raw_cycle_last;
> +        if( likely( gtod->has_rdtscp ) ) {
> +                u32     tsc_lo, tsc_hi,
> +                        tsc_cpu __attribute__((unused));
> +                asm volatile
> +                ( "rdtscp"
> +                /* ^- has built-in cancellation point / pipeline stall "barrier" */
> +                : "=a" (tsc_lo)
> +                , "=d" (tsc_hi)
> +                , "=c" (tsc_cpu)
> +                ); // since all variables 32-bit, eax, edx, ecx used - NOT rax, rdx, rcx                 
> +                tsc      = ((((u64)tsc_hi) & 0xffffffffUL) << 32) | (((u64)tsc_lo) & 0xffffffffUL);

This is not required to make the vdso accessor for monotonic raw work.

If at all then the rdtscp support wants to be in a separate patch with a
proper explanation.

Aside of that the code for rdtscp wants to be in a proper inline helper in
the relevant header file and written according to the coding style the
kernel uses for asm inlines.

> +        } else {
> +                tsc      = rdtsc_ordered();
> +        }
> +	if (likely(tsc >= last))
> +		return tsc;        
> +        asm volatile ("");                                
> +        return last;
> +}

The rest looks ok.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
  2018-03-11  6:25 Jason Vas Dias
@ 2018-03-11  6:35 ` Jason Vas Dias
  0 siblings, 0 replies; 11+ messages in thread
From: Jason Vas Dias @ 2018-03-11  6:35 UTC (permalink / raw)
  To: x86, LKML, Thomas Gleixner, andi, Peter Zijlstra

[-- Attachment #1: Type: text/plain, Size: 243 bytes --]

Oops, please disregard 1st mail on  $subject - I guess use of Quoted Printable
is not a way of getting past the email line length.
Patch I tried to send is attached as attachment - will resend inline using
other method.

Sorry, Regards, Jason

[-- Attachment #2: vdso_monotonic_raw-v4.16-rc4.patch --]
[-- Type: application/octet-stream, Size: 5065 bytes --]

diff -up linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c.4.16-rc4 linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c
--- linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c.4.16-rc4	2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c	2018-03-11 05:08:31.137681337 +0000
@@ -182,6 +182,29 @@ notrace static u64 vread_tsc(void)
 	return last;
 }
 
+notrace static u64 vread_tsc_raw(void)
+{
+        u64 tsc, last=gtod->raw_cycle_last;
+        if( likely( gtod->has_rdtscp ) ) {
+                u32     tsc_lo, tsc_hi,
+                        tsc_cpu __attribute__((unused));
+                asm volatile
+                ( "rdtscp"
+                /* ^- has built-in cancellation point / pipeline stall "barrier" */
+                : "=a" (tsc_lo)
+                , "=d" (tsc_hi)
+                , "=c" (tsc_cpu)
+                ); // since all variables 32-bit, eax, edx, ecx used - NOT rax, rdx, rcx                 
+                tsc      = ((((u64)tsc_hi) & 0xffffffffUL) << 32) | (((u64)tsc_lo) & 0xffffffffUL);
+        } else {
+                tsc      = rdtsc_ordered();
+        }
+	if (likely(tsc >= last))
+		return tsc;        
+        asm volatile ("");                                
+        return last;
+}
+
 notrace static inline u64 vgetsns(int *mode)
 {
 	u64 v;
@@ -203,6 +226,27 @@ notrace static inline u64 vgetsns(int *m
 	return v * gtod->mult;
 }
 
+notrace static inline u64 vgetsns_raw(int *mode)
+{
+	u64 v;
+	cycles_t cycles;
+
+	if (gtod->vclock_mode == VCLOCK_TSC)
+		cycles = vread_tsc_raw();
+#ifdef CONFIG_PARAVIRT_CLOCK
+	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
+		cycles = vread_pvclock(mode);
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+	else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
+		cycles = vread_hvclock(mode);
+#endif
+	else
+		return 0;
+	v = (cycles - gtod->raw_cycle_last) & gtod->raw_mask;
+	return v * gtod->raw_mult;
+}
+
 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
 notrace static int __always_inline do_realtime(struct timespec *ts)
 {
@@ -246,6 +290,27 @@ notrace static int __always_inline do_mo
 	return mode;
 }
 
+notrace static int __always_inline do_monotonic_raw( struct timespec *ts)
+{
+	unsigned long seq;
+	u64 ns;
+	int mode;
+
+	do {
+		seq = gtod_read_begin(gtod);
+		mode = gtod->vclock_mode;
+		ts->tv_sec = gtod->monotonic_time_raw_sec;
+		ns = gtod->monotonic_time_raw_nsec;
+		ns += vgetsns_raw(&mode);
+		ns >>= gtod->raw_shift;
+	} while (unlikely(gtod_read_retry(gtod, seq)));
+
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
+	return mode;
+}
+
 notrace static void do_realtime_coarse(struct timespec *ts)
 {
 	unsigned long seq;
@@ -277,6 +342,10 @@ notrace int __vdso_clock_gettime(clockid
 		if (do_monotonic(ts) == VCLOCK_NONE)
 			goto fallback;
 		break;
+	case CLOCK_MONOTONIC_RAW:
+		if (do_monotonic_raw(ts) == VCLOCK_NONE)
+			goto fallback;
+		break;
 	case CLOCK_REALTIME_COARSE:
 		do_realtime_coarse(ts);
 		break;
diff -up linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c.4.16-rc4 linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c
--- linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c.4.16-rc4	2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c	2018-03-11 05:10:57.371197747 +0000
@@ -16,6 +16,7 @@
 #include <linux/timekeeper_internal.h>
 #include <asm/vgtod.h>
 #include <asm/vvar.h>
+#include <asm/cpufeature.h>
 
 int vclocks_used __read_mostly;
 
@@ -45,6 +46,12 @@ void update_vsyscall(struct timekeeper *
 	vdata->mult		= tk->tkr_mono.mult;
 	vdata->shift		= tk->tkr_mono.shift;
 
+	vdata->raw_cycle_last   = tk->tkr_raw.cycle_last;
+	vdata->raw_mask         = tk->tkr_raw.mask;
+	vdata->raw_mult         = tk->tkr_raw.mult;
+	vdata->raw_shift	= tk->tkr_raw.shift;
+	vdata->has_rdtscp	= static_cpu_has(X86_FEATURE_RDTSCP);
+
 	vdata->wall_time_sec		= tk->xtime_sec;
 	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
 
@@ -74,5 +81,8 @@ void update_vsyscall(struct timekeeper *
 		vdata->monotonic_time_coarse_sec++;
 	}
 
+	vdata->monotonic_time_raw_sec  = tk->raw_sec;
+	vdata->monotonic_time_raw_nsec = tk->tkr_raw.xtime_nsec;
+
 	gtod_write_end(vdata);
 }
diff -up linux-4.16-rc4/arch/x86/include/asm/vgtod.h.4.16-rc4 linux-4.16-rc4/arch/x86/include/asm/vgtod.h
--- linux-4.16-rc4/arch/x86/include/asm/vgtod.h.4.16-rc4	2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/include/asm/vgtod.h	2018-03-11 05:12:35.916338703 +0000
@@ -22,6 +22,11 @@ struct vsyscall_gtod_data {
 	u64	mask;
 	u32	mult;
 	u32	shift;
+	u64	raw_cycle_last;
+	u64     raw_mask;
+	u32     raw_mult;
+	u32     raw_shift;
+	u32     has_rdtscp;
 
 	/* open coded 'struct timespec' */
 	u64		wall_time_snsec;
@@ -32,6 +37,8 @@ struct vsyscall_gtod_data {
 	gtod_long_t	wall_time_coarse_nsec;
 	gtod_long_t	monotonic_time_coarse_sec;
 	gtod_long_t	monotonic_time_coarse_nsec;
+	gtod_long_t	monotonic_time_raw_sec;
+	gtod_long_t	monotonic_time_raw_nsec;
 
 	int		tz_minuteswest;
 	int		tz_dsttime;

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
@ 2018-03-11  6:25 Jason Vas Dias
  2018-03-11  6:35 ` Jason Vas Dias
  0 siblings, 1 reply; 11+ messages in thread
From: Jason Vas Dias @ 2018-03-11  6:25 UTC (permalink / raw)
  To: x86, LKML, Thomas Gleixner, andi, Peter Zijlstra


  Currently the VDSO does not handle
     clock_gettime( CLOCK_MONOTONIC_RAW, &ts )
  on Intel / AMD - it calls
     vdso_fallback_gettime()
  for this clock, which issues a syscall, having an unacceptably high
  latency (minimum measurable time or time between measurements)
  of 300-700ns on 2 2.8-3.9ghz Haswell x86_64 Family'_'Model : 06_3C
  machines under various versions of Linux.
     
  This patch handles CLOCK_MONOTONIC_RAW clock_gettime() in the VDSO ,
  by exporting the raw clock calibration, last cycles, last xtime_nsec,
  and last raw_sec value in the vsyscall_gtod_data during vsyscall_update() .

  Now the new do_monotonic_raw() function in the vDSO has a latency of @ 24ns
  on average, and the test program:
   tools/testing/selftest/timers/inconsistency-check.c
  succeeds with arguments: '-c 4 -t 120' or any arbitrary -t value.

  The patch is against Linus' latest 4.16-rc4 tree,
  current HEAD of :
    git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
  .

  The patch affects only files:
  
   arch/x86/include/asm/vgtod.h
   arch/x86/entry/vdso/vclock_gettime.c
   arch/x86/entry/vsyscall/vsyscall_gtod.c


  Best Regards,
     Jason Vas Dias  .
     
---
diff -up linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c.4.16-rc4linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c
--- linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c.4.16-rc4 2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c	2018-03-11 05:08:31.137681337 +0000
@@ -182,6 +182,29 @@ notrace static u64 vread_tsc(void)
 	return last;
 }
 
+notrace static u64 vread_tsc_raw(void)
+{
+        u64 tsc, last+        if( likely( gtod->has_rdtscp ) ) {
+                u32     tsc_lo, tsc_hi,
+                        tsc_cpu __attribute__((unused));
+                asm volatile
+                ( "rdtscp"
+                /* ^- has built-in cancellation point / pipeline stall"barrier" */
+                : "¬_lo)
+                , "Ü_hi)
+                , "Ì_cpu)
+                ); // since all variables 32-bit, eax, edx, ecx used - NOT rax, rdx, rcx                 
+                tsc      _lo) & 0xffffffffUL);
+        } else {
+                tsc      +        }
+	if (likely(tsc >+		return tsc;        
+        asm volatile ("");                                
+        return last;
+}
+
 notrace static inline u64 vgetsns(int *mode)
 {
 	u64 v;
@@ -203,6 +226,27 @@ notrace static inline u64 vgetsns(int *m
 	return v * gtod->mult;
 }
 
+notrace static inline u64 vgetsns_raw(int *mode)
+{
+	u64 v;
+	cycles_t cycles;
+
+	if (gtod->vclock_mode +		cycles +#ifdef CONFIG_PARAVIRT_CLOCK
+	else if (gtod->vclock_mode +		cycles +#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+	else if (gtod->vclock_mode +		cycles +#endif
+	else
+		return 0;
+	v +	return v * gtod->raw_mult;
+}
+
 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
 notrace static int __always_inline do_realtime(struct timespec *ts)
 {
@@ -246,6 +290,27 @@ notrace static int __always_inline do_mo
 	return mode;
 }
 
+notrace static int __always_inline do_monotonic_raw( struct timespec *ts)
+{
+	unsigned long seq;
+	u64 ns;
+	int mode;
+
+	do {
+		seq +		mode +		ts->tv_sec +		ns +		ns ++		ns >>+	} while (unlikely(gtod_read_retry(gtod, seq)));
+
+	ts->tv_sec ++	ts->tv_nsec +
+	return mode;
+}
+
 notrace static void do_realtime_coarse(struct timespec *ts)
 {
 	unsigned long seq;
@@ -277,6 +342,10 @@ notrace int __vdso_clock_gettime(clockid
 		if (do_monotonic(ts)  			goto fallback;
 		break;
+	case CLOCK_MONOTONIC_RAW:
+		if (do_monotonic_raw(ts) +			goto fallback;
+		break;
 	case CLOCK_REALTIME_COARSE:
 		do_realtime_coarse(ts);
 		break;
diff -up linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c.4.16-rc4 linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c
--- linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c.4.16-rc4 2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c 2018-03-11 05:10:57.371197747 +0000
@@ -16,6 +16,7 @@
 #include <linux/timekeeper_internal.h>
 #include <asm/vgtod.h>
 #include <asm/vvar.h>
+#include <asm/cpufeature.h>
 
 int vclocks_used __read_mostly;
 
@@ -45,6 +46,12 @@ void update_vsyscall(struct timekeeper *
 	vdata->mult		 	vdata->shift		 
+	vdata->raw_cycle_last   +	vdata->raw_mask         +	vdata->raw_mult         +	vdata->raw_shift	+	vdata->has_rdtscp	+
 	vdata->wall_time_sec		 	vdata->wall_time_snsec		 
@@ -74,5 +81,8 @@ void update_vsyscall(struct timekeeper *
 		vdata->monotonic_time_coarse_sec++;
 	}
 
+	vdata->monotonic_time_raw_sec  +	vdata->monotonic_time_raw_nsec +
 	gtod_write_end(vdata);
 }
diff -up linux-4.16-rc4/arch/x86/include/asm/vgtod.h.4.16-rc4 linux-4.16-rc4/arch/x86/include/asm/vgtod.h
--- linux-4.16-rc4/arch/x86/include/asm/vgtod.h.4.16-rc4 2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/include/asm/vgtod.h	2018-03-11 05:12:35.916338703 +0000
@@ -22,6 +22,11 @@ struct vsyscall_gtod_data {
 	u64	mask;
 	u32	mult;
 	u32	shift;
+	u64	raw_cycle_last;
+	u64     raw_mask;
+	u32     raw_mult;
+	u32     raw_shift;
+	u32     has_rdtscp;
 
 	/* open coded 'struct timespec' */
 	u64		wall_time_snsec;
@@ -32,6 +37,8 @@ struct vsyscall_gtod_data {
 	gtod_long_t	wall_time_coarse_nsec;
 	gtod_long_t	monotonic_time_coarse_sec;
 	gtod_long_t	monotonic_time_coarse_nsec;
+	gtod_long_t	monotonic_time_raw_sec;
+	gtod_long_t	monotonic_time_raw_nsec;
 
 	int		tz_minuteswest;
 	int		tz_dsttime;

---

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
@ 2018-03-11  6:25 Jason Vas Dias
  2018-03-11 18:01 ` Thomas Gleixner
  0 siblings, 1 reply; 11+ messages in thread
From: Jason Vas Dias @ 2018-03-11  6:25 UTC (permalink / raw)
  To: x86, LKML, Thomas Gleixner, andi, Peter Zijlstra


  Currently the VDSO does not handle
     clock_gettime( CLOCK_MONOTONIC_RAW, &ts )
  on Intel / AMD - it calls
     vdso_fallback_gettime()
  for this clock, which issues a syscall, having an unacceptably high
  latency (minimum measurable time or time between measurements)
  of 300-700ns on 2 2.8-3.9ghz Haswell x86_64 Family'_'Model : 06_3C
  machines under various versions of Linux.
     
  This patch handles CLOCK_MONOTONIC_RAW clock_gettime() in the VDSO ,
  by exporting the raw clock calibration, last cycles, last xtime_nsec,
  and last raw_sec value in the vsyscall_gtod_data during vsyscall_update() .

  Now the new do_monotonic_raw() function in the vDSO has a latency of @ 24ns
  on average, and the test program:
   tools/testing/selftest/timers/inconsistency-check.c
  succeeds with arguments: '-c 4 -t 120' or any arbitrary -t value.

  The patch is against Linus' latest 4.16-rc4 tree,
  current HEAD of :
    git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
  .

  The patch affects only files:
  
   arch/x86/include/asm/vgtod.h
   arch/x86/entry/vdso/vclock_gettime.c
   arch/x86/entry/vsyscall/vsyscall_gtod.c


  Best Regards,
     Jason Vas Dias  .
     
---
diff -up linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c.4.16-rc4 linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c
--- linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c.4.16-rc4	2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c	2018-03-11 05:08:31.137681337 +0000
@@ -182,6 +182,29 @@ notrace static u64 vread_tsc(void)
 	return last;
 }
 
+notrace static u64 vread_tsc_raw(void)
+{
+        u64 tsc, last=gtod->raw_cycle_last;
+        if( likely( gtod->has_rdtscp ) ) {
+                u32     tsc_lo, tsc_hi,
+                        tsc_cpu __attribute__((unused));
+                asm volatile
+                ( "rdtscp"
+                /* ^- has built-in cancellation point / pipeline stall "barrier" */
+                : "=a" (tsc_lo)
+                , "=d" (tsc_hi)
+                , "=c" (tsc_cpu)
+                ); // since all variables 32-bit, eax, edx, ecx used - NOT rax, rdx, rcx                 
+                tsc      = ((((u64)tsc_hi) & 0xffffffffUL) << 32) | (((u64)tsc_lo) & 0xffffffffUL);
+        } else {
+                tsc      = rdtsc_ordered();
+        }
+	if (likely(tsc >= last))
+		return tsc;        
+        asm volatile ("");                                
+        return last;
+}
+
 notrace static inline u64 vgetsns(int *mode)
 {
 	u64 v;
@@ -203,6 +226,27 @@ notrace static inline u64 vgetsns(int *m
 	return v * gtod->mult;
 }
 
+notrace static inline u64 vgetsns_raw(int *mode)
+{
+	u64 v;
+	cycles_t cycles;
+
+	if (gtod->vclock_mode == VCLOCK_TSC)
+		cycles = vread_tsc_raw();
+#ifdef CONFIG_PARAVIRT_CLOCK
+	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
+		cycles = vread_pvclock(mode);
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+	else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
+		cycles = vread_hvclock(mode);
+#endif
+	else
+		return 0;
+	v = (cycles - gtod->raw_cycle_last) & gtod->raw_mask;
+	return v * gtod->raw_mult;
+}
+
 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
 notrace static int __always_inline do_realtime(struct timespec *ts)
 {
@@ -246,6 +290,27 @@ notrace static int __always_inline do_mo
 	return mode;
 }
 
+notrace static int __always_inline do_monotonic_raw( struct timespec *ts)
+{
+	unsigned long seq;
+	u64 ns;
+	int mode;
+
+	do {
+		seq = gtod_read_begin(gtod);
+		mode = gtod->vclock_mode;
+		ts->tv_sec = gtod->monotonic_time_raw_sec;
+		ns = gtod->monotonic_time_raw_nsec;
+		ns += vgetsns_raw(&mode);
+		ns >>= gtod->raw_shift;
+	} while (unlikely(gtod_read_retry(gtod, seq)));
+
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
+	return mode;
+}
+
 notrace static void do_realtime_coarse(struct timespec *ts)
 {
 	unsigned long seq;
@@ -277,6 +342,10 @@ notrace int __vdso_clock_gettime(clockid
 		if (do_monotonic(ts) == VCLOCK_NONE)
 			goto fallback;
 		break;
+	case CLOCK_MONOTONIC_RAW:
+		if (do_monotonic_raw(ts) == VCLOCK_NONE)
+			goto fallback;
+		break;
 	case CLOCK_REALTIME_COARSE:
 		do_realtime_coarse(ts);
 		break;
diff -up linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c.4.16-rc4 linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c
--- linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c.4.16-rc4	2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c	2018-03-11 05:10:57.371197747 +0000
@@ -16,6 +16,7 @@
 #include <linux/timekeeper_internal.h>
 #include <asm/vgtod.h>
 #include <asm/vvar.h>
+#include <asm/cpufeature.h>
 
 int vclocks_used __read_mostly;
 
@@ -45,6 +46,12 @@ void update_vsyscall(struct timekeeper *
 	vdata->mult		= tk->tkr_mono.mult;
 	vdata->shift		= tk->tkr_mono.shift;
 
+	vdata->raw_cycle_last   = tk->tkr_raw.cycle_last;
+	vdata->raw_mask         = tk->tkr_raw.mask;
+	vdata->raw_mult         = tk->tkr_raw.mult;
+	vdata->raw_shift	= tk->tkr_raw.shift;
+	vdata->has_rdtscp	= static_cpu_has(X86_FEATURE_RDTSCP);
+
 	vdata->wall_time_sec		= tk->xtime_sec;
 	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
 
@@ -74,5 +81,8 @@ void update_vsyscall(struct timekeeper *
 		vdata->monotonic_time_coarse_sec++;
 	}
 
+	vdata->monotonic_time_raw_sec  = tk->raw_sec;
+	vdata->monotonic_time_raw_nsec = tk->tkr_raw.xtime_nsec;
+
 	gtod_write_end(vdata);
 }
diff -up linux-4.16-rc4/arch/x86/include/asm/vgtod.h.4.16-rc4 linux-4.16-rc4/arch/x86/include/asm/vgtod.h
--- linux-4.16-rc4/arch/x86/include/asm/vgtod.h.4.16-rc4	2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/include/asm/vgtod.h	2018-03-11 05:12:35.916338703 +0000
@@ -22,6 +22,11 @@ struct vsyscall_gtod_data {
 	u64	mask;
 	u32	mult;
 	u32	shift;
+	u64	raw_cycle_last;
+	u64     raw_mask;
+	u32     raw_mult;
+	u32     raw_shift;
+	u32     has_rdtscp;
 
 	/* open coded 'struct timespec' */
 	u64		wall_time_snsec;
@@ -32,6 +37,8 @@ struct vsyscall_gtod_data {
 	gtod_long_t	wall_time_coarse_nsec;
 	gtod_long_t	monotonic_time_coarse_sec;
 	gtod_long_t	monotonic_time_coarse_nsec;
+	gtod_long_t	monotonic_time_raw_sec;
+	gtod_long_t	monotonic_time_raw_nsec;
 
 	int		tz_minuteswest;
 	int		tz_dsttime;
---

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW
@ 2018-03-11  6:25 Jason Vas Dias
  2018-03-12  6:37 ` kbuild test robot
  2018-03-12  6:41 ` kbuild test robot
  0 siblings, 2 replies; 11+ messages in thread
From: Jason Vas Dias @ 2018-03-11  6:25 UTC (permalink / raw)
  To: x86, LKML, Thomas Gleixner, andi, Peter Zijlstra


  Currently the VDSO does not handle
     clock_gettime( CLOCK_MONOTONIC_RAW, &ts )
  on Intel / AMD - it calls
     vdso_fallback_gettime()
  for this clock, which issues a syscall, having an unacceptably high
  latency (minimum measurable time or time between measurements)
  of 300-700ns on 2 2.8-3.9ghz Haswell x86_64 Family'_'Model : 06_3C
  machines under various versions of Linux.

  Sometimes, particularly when correlating elapsed time to performance
  counter values,  code needs to know elapsed time from the perspective
  of the CPU no matter how "hot" / fast or "cold" / slow it might be
  running wrt NTP / PTP ; when code needs this, the latencies with
  a syscall are often unacceptably high.

  I reported this as Bug #198161 :
    'https://bugzilla.kernel.org/show_bug.cgi?id=198961'
  and in previous posts with subjects matching 'CLOCK_MONOTONIC_RAW' .
     
  This patch handles CLOCK_MONOTONIC_RAW clock_gettime() in the VDSO ,
  by exporting the raw clock calibration, last cycles, last xtime_nsec,
  and last raw_sec value in the vsyscall_gtod_data during vsyscall_update() .

  Now the new do_monotonic_raw() function in the vDSO has a latency of @ 24ns
  on average, and the test program:
   tools/testing/selftest/timers/inconsistency-check.c
  succeeds with arguments: '-c 4 -t 120' or any arbitrary -t value.

  The patch is against Linus' latest 4.16-rc4 tree,
  current HEAD of :
    git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
  .

  The patch affects only files:
  
   arch/x86/include/asm/vgtod.h
   arch/x86/entry/vdso/vclock_gettime.c
   arch/x86/entry/vsyscall/vsyscall_gtod.c


  This is a resend of the original patch fixing indentation issues
  after installation of emacs Lisp cc-mode hooks in
      Documentation/coding-style.rst
  and calling 'indent-region' and 'tabify' (whitespace only changes) - SORRY ! 

  Best Regards,
     Jason Vas Dias  .
     
---
diff -up linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c.4.16-rc4 linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c
--- linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c.4.16-rc4	2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c	2018-03-11 19:00:04.630019100 +0000
@@ -182,6 +182,29 @@ notrace static u64 vread_tsc(void)
 	return last;
 }
 
+notrace static u64 vread_tsc_raw(void)
+{
+	u64 tsc, last=gtod->raw_cycle_last;
+	if( likely( gtod->has_rdtscp ) ) {
+		u32	tsc_lo, tsc_hi,
+			tsc_cpu __attribute__((unused));
+		asm volatile
+			( "rdtscp"
+				/* ^- has built-in cancellation point / pipeline stall"barrier" */
+				:   "=a" (tsc_lo)
+				  , "=d" (tsc_hi)
+				  , "=c" (tsc_cpu)
+			); // since all variables 32-bit, eax, edx, ecx used - NOT rax, rdx, rcx
+		tsc	= ((((u64)tsc_hi) & 0xffffffffUL) << 32) | (((u64)tsc_lo) & 0xffffffffUL);
+	} else {
+		tsc	= rdtsc_ordered();
+	}
+	if (likely(tsc >= last))
+		return tsc;
+	asm volatile ("");
+	return last;
+}
+
 notrace static inline u64 vgetsns(int *mode)
 {
 	u64 v;
@@ -203,6 +226,27 @@ notrace static inline u64 vgetsns(int *m
 	return v * gtod->mult;
 }
 
+notrace static inline u64 vgetsns_raw(int *mode)
+{
+	u64 v;
+	cycles_t cycles;
+
+	if (gtod->vclock_mode == VCLOCK_TSC)
+		cycles = vread_tsc_raw();
+#ifdef CONFIG_PARAVIRT_CLOCK
+	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
+		cycles = vread_pvclock(mode);
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+	else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
+		cycles = vread_hvclock(mode);
+#endif
+	else
+		return 0;
+	v = (cycles - gtod->raw_cycle_last) & gtod->raw_mask;
+	return v * gtod->raw_mult;
+}
+
 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
 notrace static int __always_inline do_realtime(struct timespec *ts)
 {
@@ -246,6 +290,27 @@ notrace static int __always_inline do_mo
 	return mode;
 }
 
+notrace static int __always_inline do_monotonic_raw( struct timespec *ts)
+{
+	unsigned long seq;
+	u64 ns;
+	int mode;
+
+	do {
+		seq = gtod_read_begin(gtod);
+		mode = gtod->vclock_mode;
+		ts->tv_sec = gtod->monotonic_time_raw_sec;
+		ns = gtod->monotonic_time_raw_nsec;
+		ns += vgetsns_raw(&mode);
+		ns >>= gtod->raw_shift;
+	} while (unlikely(gtod_read_retry(gtod, seq)));
+
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
+	return mode;
+}
+
 notrace static void do_realtime_coarse(struct timespec *ts)
 {
 	unsigned long seq;
@@ -277,6 +342,10 @@ notrace int __vdso_clock_gettime(clockid
 		if (do_monotonic(ts) == VCLOCK_NONE)
 			goto fallback;
 		break;
+	case CLOCK_MONOTONIC_RAW:
+		if (do_monotonic_raw(ts) == VCLOCK_NONE)
+			goto fallback;
+		break;
 	case CLOCK_REALTIME_COARSE:
 		do_realtime_coarse(ts);
 		break;
diff -up linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c.4.16-rc4 linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c
--- linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c.4.16-rc4	2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c	2018-03-11 19:06:36.584178867 +0000
@@ -16,6 +16,7 @@
 #include <linux/timekeeper_internal.h>
 #include <asm/vgtod.h>
 #include <asm/vvar.h>
+#include <asm/cpufeature.h>
 
 int vclocks_used __read_mostly;
 
@@ -45,6 +46,12 @@ void update_vsyscall(struct timekeeper *
 	vdata->mult		= tk->tkr_mono.mult;
 	vdata->shift		= tk->tkr_mono.shift;
 
+	vdata->raw_cycle_last	= tk->tkr_raw.cycle_last;
+	vdata->raw_mask		= tk->tkr_raw.mask;
+	vdata->raw_mult		= tk->tkr_raw.mult;
+	vdata->raw_shift	= tk->tkr_raw.shift;
+	vdata->has_rdtscp	= static_cpu_has(X86_FEATURE_RDTSCP);
+
 	vdata->wall_time_sec		= tk->xtime_sec;
 	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
 
@@ -74,5 +81,8 @@ void update_vsyscall(struct timekeeper *
 		vdata->monotonic_time_coarse_sec++;
 	}
 
+	vdata->monotonic_time_raw_sec  = tk->raw_sec;
+	vdata->monotonic_time_raw_nsec = tk->tkr_raw.xtime_nsec;
+
 	gtod_write_end(vdata);
 }
diff -up linux-4.16-rc4/arch/x86/include/asm/vgtod.h.4.16-rc4 linux-4.16-rc4/arch/x86/include/asm/vgtod.h
--- linux-4.16-rc4/arch/x86/include/asm/vgtod.h.4.16-rc4	2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/include/asm/vgtod.h	2018-03-11 18:58:36.670840250 +0000
@@ -22,6 +22,11 @@ struct vsyscall_gtod_data {
 	u64	mask;
 	u32	mult;
 	u32	shift;
+	u64	raw_cycle_last;
+	u64	raw_mask;
+	u32	raw_mult;
+	u32	raw_shift;
+	u32	has_rdtscp;
 
 	/* open coded 'struct timespec' */
 	u64		wall_time_snsec;
@@ -32,6 +37,8 @@ struct vsyscall_gtod_data {
 	gtod_long_t	wall_time_coarse_nsec;
 	gtod_long_t	monotonic_time_coarse_sec;
 	gtod_long_t	monotonic_time_coarse_nsec;
+	gtod_long_t	monotonic_time_raw_sec;
+	gtod_long_t	monotonic_time_raw_nsec;
 
 	int		tz_minuteswest;
 	int		tz_dsttime;
---

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-03-12  8:27 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-11 19:46 [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW Jason Vas Dias
2018-03-12  8:26 ` kbuild test robot
  -- strict thread matches above, loose matches on Subject: below --
2018-03-11  6:25 Jason Vas Dias
2018-03-12  6:37 ` kbuild test robot
2018-03-12  6:41 ` kbuild test robot
2018-03-11  6:25 Jason Vas Dias
2018-03-11  6:35 ` Jason Vas Dias
2018-03-11  6:25 Jason Vas Dias
2018-03-11 18:01 ` Thomas Gleixner
2018-03-11 22:03   ` Jason Vas Dias
2018-03-11 22:51     ` Thomas Gleixner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).