All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 8/16] LTTng 0.6.36 for 2.6.18 : Timestamp
@ 2006-11-24 21:59 Mathieu Desnoyers
       [not found] ` <1164475747.5196.5.camel@localhost.localdomain>
  0 siblings, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2006-11-24 21:59 UTC (permalink / raw)
  To: linux-kernel, Christoph Hellwig, Andrew Morton, Ingo Molnar,
	Greg Kroah-Hartman, Thomas Gleixner, Tom Zanussi, Karim Yaghmour,
	Paul Mundt, Jes Sorensen, Richard J Moore, Martin J. Bligh,
	Michel Dagenais, Douglas Niehaus, ltt-dev, systemtap

Architecture specific timestamping primitives.

patch08-2.6.18-lttng-core-0.6.36-timestamp.diff

Signed-off-by : Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>

--BEGIN--
--- /dev/null
+++ b/include/asm-alpha/ltt.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_ALPHA_LTT_H
+#define _ASM_ALPHA_LTT_H
+/*
+ * linux/include/asm-alpha/ltt.h
+ *
+ * Copyright (C) 2005 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
+ * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
+ * Copyright (C) 2002 - Karim Yaghmour (karim@opersys.com)
+ *
+ * alpha architecture specific definitions for ltt
+ */
+
+#define LTT_HAS_TSC
+
+#endif
--- /dev/null
+++ b/include/asm-arm26/ltt.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_ARM26_LTT_H
+#define _ASM_ARM26_LTT_H
+
+#include <asm-generic/ltt.h>
+#endif
--- /dev/null
+++ b/include/asm-arm/ltt.h
@@ -0,0 +1,82 @@
+/*
+ * linux/include/asm-arm/ltt.h
+ *
+ * Copyright (C) 2005, Mathieu Desnoyers
+ *
+ * ARM definitions for tracing system
+ */
+
+#ifndef _ASM_ARM_LTT_H
+#define _ASM_ARM_LTT_H
+
+#include <linux/jiffies.h>
+#include <linux/seqlock.h>
+
+#define LTT_ARCH_TYPE LTT_ARCH_TYPE_ARM
+#define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_NONE
+
+#undef LTT_HAS_TSC
+
+#define LTTNG_LOGICAL_SHIFT 13
+
+extern atomic_t lttng_logical_clock;
+
+static inline u32 ltt_get_timestamp32(void)
+{
+	unsigned long seq;
+	unsigned long try = 5;
+	u32 ret;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		ret = (jiffies << LTTNG_LOGICAL_SHIFT) 
+			| (atomic_add_return(1, &lttng_logical_clock));
+	} while (read_seqretry(&xtime_lock, seq) && (--try) > 0);
+
+	if (try == 0)
+		return 0;
+	else
+		return ret;
+}
+
+
+/* The shift overflow doesn't matter */
+static inline u64 ltt_get_timestamp64(void)
+{
+	unsigned long seq;
+	unsigned long try = 5;
+	u64 ret;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		ret = (jiffies_64 << LTTNG_LOGICAL_SHIFT) 
+			| (atomic_add_return(1, &lttng_logical_clock));
+	} while (read_seqretry(&xtime_lock, seq) && (--try) > 0);
+
+	if (try == 0)
+		return 0;
+	else
+		return ret;
+}
+
+/* this has to be called with the write seqlock held */
+static inline void ltt_reset_timestamp(void)
+{
+	atomic_set(&lttng_logical_clock, 0);
+}
+
+
+static inline unsigned int ltt_frequency(void)
+{
+  return HZ << LTTNG_LOGICAL_SHIFT;
+}
+
+
+static inline u32 ltt_freq_scale(void)
+{
+  return 1;
+}
+
+
+
+#endif
--- /dev/null
+++ b/include/asm-cris/ltt.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_CRIS_LTT_H
+#define _ASM_CRIS_LTT_H
+
+#include <asm-generic/ltt.h>
+#endif
--- /dev/null
+++ b/include/asm-frv/ltt.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_FRV_LTT_H
+#define _ASM_FRV_LTT_H
+
+#include <asm-generic/ltt.h>
+#endif
--- /dev/null
+++ b/include/asm-generic/ltt.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_GENERIC_LTT_H
+#define _ASM_GENERIC_LTT_H
+/*
+ * linux/include/asm-generic/ltt.h
+ *
+ * Copyright (C) 2005 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
+ *
+ * Architecture dependent definitions for ltt
+ * Architecture without TSC
+ */
+
+#endif
--- /dev/null
+++ b/include/asm-h8300/ltt.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_H8300_LTT_H
+#define _ASM_H8300_LTT_H
+
+#include <asm-generic/ltt.h>
+#endif
--- /dev/null
+++ b/include/asm-i386/ltt.h
@@ -0,0 +1,154 @@
+#ifndef _ASM_I386_LTT_H
+#define _ASM_I386_LTT_H
+/*
+ * linux/include/asm-i386/ltt.h
+ *
+ * Copyright (C) 2005,2006 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
+ *
+ * i386 time and TSC definitions for ltt
+ */
+
+#include <linux/jiffies.h>
+#include <linux/seqlock.h>
+
+#include <asm/timex.h>
+#include <asm/processor.h>
+
+#define LTT_ARCH_TYPE LTT_ARCH_TYPE_I386
+#define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_NONE
+
+#define LTTNG_LOGICAL_SHIFT 13
+
+extern atomic_t lttng_logical_clock;
+
+/* The shift overflow doesn't matter
+ * We use the xtime seq_lock to protect 64 bits clock and
+ * 32 bits ltt logical clock coherency.
+ *
+ * try 5 times. If it still fails, we are cleary in a NMI nested over
+ * the seq_lock. Return 0 -> error.
+ *
+ * 0 is considered an erroneous value.
+ */
+
+static inline u32 ltt_timestamp_no_tsc32(void)
+{
+	unsigned long seq;
+	unsigned long try = 5;
+	u32 ret;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		ret = (jiffies << LTTNG_LOGICAL_SHIFT) 
+			| (atomic_add_return(1, &lttng_logical_clock));
+	} while (read_seqretry(&xtime_lock, seq) && (--try) > 0);
+
+	if (try == 0)
+		return 0;
+	else
+		return ret;
+}
+
+
+static inline u64 ltt_timestamp_no_tsc64(void)
+{
+	unsigned long seq;
+	unsigned long try = 5;
+	u64 ret;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		ret = (jiffies_64 << LTTNG_LOGICAL_SHIFT) 
+			| (atomic_add_return(1, &lttng_logical_clock));
+	} while (read_seqretry(&xtime_lock, seq) && (--try) > 0);
+
+	if (try == 0)
+		return 0;
+	else
+		return ret;
+}
+
+#ifdef CONFIG_LTT_SYNTHETIC_TSC
+u64 ltt_heartbeat_read_synthetic_tsc(void);
+#endif //CONFIG_LTT_SYNTHETIC_TSC
+
+static inline u32 ltt_get_timestamp32(void)
+{
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc)
+		return ltt_timestamp_no_tsc32();
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+	return get_cycles(); /* only need the 32 LSB */
+#else
+	return ltt_timestamp_no_tsc32();
+#endif
+}
+
+static inline u64 ltt_get_timestamp64(void)
+{
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc)
+		return ltt_timestamp_no_tsc64();
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+#ifdef CONFIG_LTT_SYNTHETIC_TSC
+	return ltt_heartbeat_read_synthetic_tsc();
+#else
+	return get_cycles();
+#endif //CONFIG_LTT_SYNTHETIC_TSC
+#else
+	return ltt_timestamp_no_tsc64();
+#endif
+}
+
+/* this has to be called with the write seqlock held */
+static inline void ltt_reset_timestamp(void)
+{
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc) {
+		atomic_set(&lttng_logical_clock, 0);
+		return;
+	}
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+	return;
+#else
+	atomic_set(&lttng_logical_clock, 0);
+	return;
+#endif
+}
+
+static inline unsigned int ltt_frequency(void)
+{
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc)
+  	return HZ << LTTNG_LOGICAL_SHIFT;
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+	return cpu_khz;
+#else
+	return HZ << LTTNG_LOGICAL_SHIFT;
+#endif
+}
+
+static inline u32 ltt_freq_scale(void)
+{
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc)
+  	return 1;
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+	return 1000;
+#else
+	return 1;
+#endif
+
+}
+
+#endif //_ASM_I386_LTT_H
--- /dev/null
+++ b/include/asm-ia64/ltt.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_IA64_LTT_H
+#define _ASM_IA64_LTT_H
+
+#define LTT_HAS_TSC
+
+#endif
--- /dev/null
+++ b/include/asm-m32r/ltt.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_M32R_LTT_H
+#define _ASM_M32R_LTT_H
+
+#include <asm-generic/ltt.h>
+#endif
--- /dev/null
+++ b/include/asm-m68k/ltt.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_M68K_LTT_H
+#define _ASM_M68K_LTT_H
+
+#include <asm-generic/ltt.h>
+#endif
--- /dev/null
+++ b/include/asm-m68knommu/ltt.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_M68KNOMMU_LTT_H
+#define _ASM_M68KNOMMU_LTT_H
+
+#include <asm-generic/ltt.h>
+#endif
--- /dev/null
+++ b/include/asm-mips/ltt.h
@@ -0,0 +1,50 @@
+/*
+ * linux/include/asm-mips/ltt.h
+ *
+ * Copyright (C) 2005, Mathieu Desnoyers
+ *
+ * MIPS definitions for tracing system
+ */
+
+#ifndef _ASM_MIPS_LTT_H
+#define _ASM_MIPS_LTT_H
+
+#define LTT_HAS_TSC
+
+/* Current arch type */
+#define LTT_ARCH_TYPE LTT_ARCH_TYPE_MIPS
+
+/* Current variant type */
+#define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_NONE
+
+#include <linux/ltt-core.h>
+#include <asm/timex.h>
+#include <asm/processor.h>
+
+u64 ltt_heartbeat_read_synthetic_tsc(void);
+
+/* MIPS get_cycles only returns a 32 bits TSC (see timex.h). The assumption
+ * there is that the reschedule is done every 8 seconds or so, so we must
+ * make sure there is at least an event (heartbeat) between  each TSC wrap
+ * around. We use the LTT synthetic TSC exactly for this. */
+static inline u32 ltt_get_timestamp32(void)
+{
+	return get_cycles();
+}
+
+static inline u64 ltt_get_timestamp64(void)
+{
+	return ltt_heartbeat_read_synthetic_tsc();
+}
+
+static inline unsigned int ltt_frequency(void)
+{
+	return mips_hpt_frequency;
+}
+
+static inline u32 ltt_freq_scale(void)
+{
+	return 1;
+}
+
+#endif //_ASM_MIPS_LTT_H
--- a/include/asm-mips/mipsregs.h
+++ b/include/asm-mips/mipsregs.h
@@ -383,6 +383,7 @@ #define ST0_XX			0x80000000	/* MIPS IV n
  */
 #define  CAUSEB_EXCCODE		2
 #define  CAUSEF_EXCCODE		(_ULCAST_(31)  <<  2)
+#define  CAUSE_EXCCODE(cause)	(((cause) & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE)
 #define  CAUSEB_IP		8
 #define  CAUSEF_IP		(_ULCAST_(255) <<  8)
 #define  CAUSEB_IP0		8
--- a/include/asm-mips/timex.h
+++ b/include/asm-mips/timex.h
@@ -51,4 +51,6 @@ static inline cycles_t get_cycles (void)
 	return read_c0_count();
 }
 
+extern unsigned int mips_hpt_frequency;
+
 #endif /*  _ASM_TIMEX_H */
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -771,6 +771,7 @@ EXPORT_SYMBOL(rtc_lock);
 EXPORT_SYMBOL(to_tm);
 EXPORT_SYMBOL(rtc_mips_set_time);
 EXPORT_SYMBOL(rtc_mips_get_time);
+EXPORT_SYMBOL(mips_hpt_frequency);
 
 unsigned long long sched_clock(void)
 {
--- /dev/null
+++ b/include/asm-parisc/ltt.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_PARISC_LTT_H
+#define _ASM_PARISC_LTT_H
+
+#define LTT_HAS_TSC
+
+#endif
--- /dev/null
+++ b/include/asm-powerpc/ltt.h
@@ -0,0 +1,47 @@
+/*
+ * linux/include/asm-powerpc/ltt.h
+ *
+ * Copyright (C) 2005, Mathieu Desnoyers
+ *
+ * POWERPC definitions for tracing system
+ */
+
+#ifndef _ASM_POWERPC_LTT_H
+#define _ASM_POWERPC_LTT_H
+
+#define LTT_HAS_TSC
+
+/* Current arch type */
+#define LTT_ARCH_TYPE LTT_ARCH_TYPE_POWERPC
+
+/* Current variant type */
+#define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_NONE
+
+#include <linux/ltt-core.h>
+#include <asm/timex.h>
+#include <asm/time.h>
+#include <asm/processor.h>
+
+u64 ltt_heartbeat_read_synthetic_tsc(void);
+
+static inline u32 ltt_get_timestamp32(void)
+{
+	return get_tbl();
+}
+
+static inline u64 ltt_get_timestamp64(void)
+{
+	return get_tb();
+}
+
+static inline unsigned int ltt_frequency(void)
+{
+	return tb_ticks_per_sec;
+}
+
+static inline u32 ltt_freq_scale(void)
+{
+	return 1;
+}
+
+#endif //_ASM_POWERPC_LTT_H
--- /dev/null
+++ b/include/asm-ppc/ltt.h
@@ -0,0 +1,150 @@
+/*
+ * linux/include/asm-ppc/ltt.h
+ *
+ * Copyright (C)	2002, Karim Yaghmour
+ *		 	2005, Mathieu Desnoyers
+ *
+ * PowerPC definitions for tracing system
+ */
+
+#ifndef _ASM_PPC_LTT_H
+#define _ASM_PPC_LTT_H
+
+#include <linux/config.h>
+#include <linux/jiffies.h>
+
+/* Current arch type */
+#define LTT_ARCH_TYPE LTT_ARCH_TYPE_PPC
+
+/* PowerPC variants */
+#define LTT_ARCH_VARIANT_PPC_4xx 1	/* 4xx systems (IBM embedded series) */
+#define LTT_ARCH_VARIANT_PPC_6xx 2	/* 6xx/7xx/74xx/8260/POWER3 systems
+					   (desktop flavor) */
+#define LTT_ARCH_VARIANT_PPC_8xx 3	/* 8xx system (Motoral embedded series)
+					 */
+#define LTT_ARCH_VARIANT_PPC_ISERIES 4	/* 8xx system (iSeries) */
+
+/* Current variant type */
+#if defined(CONFIG_4xx)
+#define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_PPC_4xx
+#elif defined(CONFIG_6xx)
+#define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_PPC_6xx
+#elif defined(CONFIG_8xx)
+#define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_PPC_8xx
+#elif defined(CONFIG_PPC_ISERIES)
+#define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_PPC_ISERIES
+#else
+#define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_NONE
+#endif
+
+#define LTTNG_LOGICAL_SHIFT 13
+
+extern atomic_t lttng_logical_clock;
+
+
+/* The shift overflow doesn't matter */
+static inline u32 _ltt_get_timestamp32(void)
+{	
+	unsigned long seq;
+	unsigned long try = 5;
+	u32 ret;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		ret = (jiffies << LTTNG_LOGICAL_SHIFT) 
+			| (atomic_add_return(1, &lttng_logical_clock));
+	} while (read_seqretry(&xtime_lock, seq) && (--try) > 0);
+
+	if (try == 0)
+		return 0;
+	else
+		return ret;
+}
+
+static inline _ltt_get_tb32(u32 *p)
+{
+	unsigned lo;
+	asm volatile("mftb %0"
+		 : "=r" (lo));
+	p[0] = lo;
+}
+
+static inline u32 ltt_get_timestamp32(void)
+{
+	u32 ret;
+	if ((get_pvr() >> 16) == 1)
+		ret = _ltt_get_timestamp32();
+	else
+		_ltt_get_tb32((u32*)&ret);
+	return ret;
+}
+
+/* The shift overflow doesn't matter */
+static inline u64 _ltt_get_timestamp64(void)
+{	
+	unsigned long seq;
+	unsigned long try = 5;
+	u64 ret;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		ret = (jiffies_64 << LTTNG_LOGICAL_SHIFT) 
+			| (atomic_add_return(1, &lttng_logical_clock));
+	} while (read_seqretry(&xtime_lock, seq) && (--try) > 0);
+
+	if (try == 0)
+		return 0;
+	else
+		return ret;
+}
+
+#ifdef SMP
+#define pvr_ver (PVR_VER(current_cpu_data.pvr))
+#else
+#define pvr_ver (PVR_VER(mfspr(SPRN_PVR)))
+#endif
+
+/* from arch/ppc/xmon/xmon.c */
+static inline void _ltt_get_tb64(unsigned *p)
+{
+	unsigned hi, lo, hiagain;
+
+	do {
+		asm volatile("mftbu %0; mftb %1; mftbu %2"
+			 : "=r" (hi), "=r" (lo), "=r" (hiagain));
+	} while (hi != hiagain);
+	p[0] = hi;
+	p[1] = lo;
+}
+
+static inline u64 ltt_get_timestamp64(void)
+{
+	u64 ret;
+	if (pvr_ver == 1)
+  		ret = _ltt_get_timestamp64();
+	else
+		_ltt_get_tb64((unsigned*)&ret);
+	return ret;
+}
+
+/* this has to be called with the write seqlock held */
+static inline void ltt_reset_timestamp(void)
+{
+	if (pvr_ver == 1)
+		atomic_set(&lttng_logical_clock, 0);
+}
+
+static inline unsigned int ltt_frequency(void)
+{
+	if (pvr_ver == 1)
+		return HZ << LTTNG_LOGICAL_SHIFT;
+	else
+		return (tb_ticks_per_jiffy * HZ);
+}
+
+static inline u32 ltt_freq_scale(void)
+{
+	return 1;
+}
+
+#endif //_ASM_PPC_LTT_H
--- /dev/null
+++ b/include/asm-s390/ltt.h
@@ -0,0 +1,20 @@
+/*
+ * linux/include/asm-s390/ltt.h
+ *
+ * Copyright (C) 2002, Karim Yaghmour
+ *
+ * S/390 definitions for tracing system
+ */
+
+#ifndef _ASM_S390_LTT_H
+#define _ASM_S390_LTT_H
+
+#define LTT_HAS_TSC
+
+/* Current arch type */
+#define LTT_ARCH_TYPE LTT_ARCH_TYPE_S390
+
+/* Current variant type */
+#define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_NONE
+
+#endif//_ASM_S390_LTT_H
--- /dev/null
+++ b/include/asm-sh64/ltt.h
@@ -0,0 +1,14 @@
+/*
+ * linux/include/asm-sh64/ltt.h
+ *
+ * Copyright (C) 2002, Karim Yaghmour
+ *
+ * SuperH definitions for tracing system
+ */
+
+#ifndef _ASM_SH_LTT_H
+#define _ASM_SH_LTT_H
+
+#include <asm-generic/ltt.h>
+
+#endif
--- /dev/null
+++ b/include/asm-sh/ltt.h
@@ -0,0 +1,14 @@
+/*
+ * linux/include/asm-sh/ltt.h
+ *
+ * Copyright (C) 2002, Karim Yaghmour
+ *
+ * SuperH definitions for tracing system
+ */
+
+#ifndef _ASM_SH_LTT_H
+#define _ASM_SH_LTT_H
+
+#include <asm-generic/ltt.h>
+
+#endif
--- /dev/null
+++ b/include/asm-sparc64/ltt.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_SPARC64_LTT_H
+#define _ASM_SPARC64_LTT_H
+/*
+ * linux/include/asm-sparc64/ltt.h
+ *
+ * Copyright (C) 2005 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
+ * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
+ * Copyright (C) 2002 - Karim Yaghmour (karim@opersys.com)
+ *
+ * sparc64 time and TSC definitions for ltt
+ */
+
+#define LTT_HAS_TSC
+
+#endif
--- /dev/null
+++ b/include/asm-sparc/ltt.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_SPARC_LTT_H
+#define _ASM_SPARC_LTT_H
+
+#include <asm-generic/ltt.h>
+#endif
--- /dev/null
+++ b/include/asm-um/ltt.h
@@ -0,0 +1,13 @@
+/*
+ * linux/include/asm-um/ltt.h
+ *
+ * Copyright (C) 2002, Karim Yaghmour
+ * 
+ */
+
+#ifndef _ASM_SH_LTT_H
+#define _ASM_SH_LTT_H
+
+#include <asm-generic/ltt.h>
+
+#endif
--- /dev/null
+++ b/include/asm-v850/ltt.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_V850_LTT_H
+#define _ASM_V850_LTT_H
+
+#include <asm-generic/ltt.h>
+#endif
--- /dev/null
+++ b/include/asm-x86_64/ltt.h
@@ -0,0 +1,149 @@
+#ifndef _ASM_X86_64_LTT_H
+#define _ASM_X86_64_LTT_H
+/*
++ * linux/include/asm-x86_64/ltt.h
++ *
++ * x86_64 time and TSC definitions for ltt
++ */
+
+#include <asm/timex.h>
+#include <asm/processor.h>
+
+#define LTT_ARCH_TYPE LTT_ARCH_TYPE_X86_64
+#define LTT_ARCH_VARIANT LTT_ARCH_VARIANT_NONE
+
+#define LTTNG_LOGICAL_SHIFT 13
+
+extern atomic_t lttng_logical_clock;
+
+/* The shift overflow doesn't matter
+ * We use the xtime seq_lock to protect 64 bits clock and
+ * 32 bits ltt logical clock coherency.
+ *
+ * try 5 times. If it still fails, we are cleary in a NMI nested over
+ * the seq_lock. Return 0 -> error.
+ *
+ * 0 is considered an erroneous value.
+ */
+
+static inline u32 ltt_timestamp_no_tsc_32(void)
+{
+	unsigned long seq;
+	unsigned long try = 5;
+	u32 ret;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		ret = (jiffies << LTTNG_LOGICAL_SHIFT) 
+			| (atomic_add_return(1, &lttng_logical_clock));
+	} while (read_seqretry(&xtime_lock, seq) && (--try) > 0);
+
+	if (try == 0)
+		return 0;
+	else
+		return ret;
+}
+
+
+static inline u64 ltt_timestamp_no_tsc(void)
+{
+	unsigned long seq;
+	unsigned long try = 5;
+	u64 ret;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		ret = (jiffies_64 << LTTNG_LOGICAL_SHIFT) 
+			| (atomic_add_return(1, &lttng_logical_clock));
+	} while (read_seqretry(&xtime_lock, seq) && (--try) > 0);
+
+	if (try == 0)
+		return 0;
+	else
+		return ret;
+}
+
+#ifdef CONFIG_LTT_SYNTHETIC_TSC
+u64 ltt_heartbeat_read_synthetic_tsc(void);
+#endif //CONFIG_LTT_SYNTHETIC_TSC
+
+static inline u32 ltt_get_timestamp32(void)
+{
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc)
+		return ltt_timestamp_no_tsc32();
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+	return get_cycles(); /* only need the 32 LSB */
+#else
+	return ltt_timestamp_no_tsc32();
+#endif
+}
+
+static inline u64 ltt_get_timestamp64(void)
+{
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc)
+		return ltt_timestamp_no_tsc64();
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+#ifdef CONFIG_LTT_SYNTHETIC_TSC
+	return ltt_heartbeat_read_synthetic_tsc();
+#else
+	return get_cycles();
+#endif //CONFIG_LTT_SYNTHETIC_TSC
+#else
+	return ltt_timestamp_no_tsc64();
+#endif
+}
+
+/* this has to be called with the write seqlock held */
+static inline void ltt_reset_timestamp(void)
+{
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc) {
+		atomic_set(&lttng_logical_clock, 0);
+		return;
+	}
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+	return;
+#else
+	atomic_set(&lttng_logical_clock, 0);
+	return;
+#endif
+}
+
+static inline unsigned int ltt_frequency(void)
+{
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc)
+  	return HZ << LTTNG_LOGICAL_SHIFT;
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+	return cpu_khz;
+#else
+	return HZ << LTTNG_LOGICAL_SHIFT;
+#endif
+}
+
+static inline u32 ltt_freq_scale(void)
+{
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc)
+  	return 1;
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+	return 1000;
+#else
+	return 1;
+#endif
+
+}
+
+#endif //_ASM_X86_64_LTT_H
--- /dev/null
+++ b/include/asm-xtensa/ltt.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_XTENSA_LTT_H
+#define _ASM_XTENSA_LTT_H
+
+#include <asm-generic/ltt.h>
+#endif
--END--

OpenPGP public key:              http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint:     8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [RFC] Fast assurate clock readable from user space and NMI handler
       [not found]         ` <1164585589.16871.52.camel@localhost.localdomain>
@ 2007-02-24 16:19           ` Mathieu Desnoyers
  2007-02-24 18:06             ` Daniel Walker
  0 siblings, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2007-02-24 16:19 UTC (permalink / raw)
  To: mbligh, Daniel Walker; +Cc: linux-kernel

Hi,

I am trying to improve the Linux kernel time source so it can be read
without seqlock from NMI handlers. I have also seen some interest for
such an accurate monotonic clock readable from user space. It mainly
implies an atomic update of the time value. I am also trying to figure a
way to support architectures with multiple CPUs with non-synchronized
TSCs.

I would like to have your comments on the following idea.

Thanks in advance,

Mathieu


Monotonic accurate time

The goal of this design is to provide a monotonic time :

Readable from userspace without a system call
Readable from NMI handler
Readable without disabling interrupts
Readable without disabling preemption
Only one clock source (most precise available : tsc)
Support architectures with variable TSC frequency.

Main difference with wall time currently implemented in the Linux kernel : the
time update is done atomically instead of using a write seqlock. It permits
reading time from NMI handler and from userspace.

struct time_info {
	u64 tsc;
	u64 freq;
	u64 walltime;
}

static struct time_struct {
	struct time_info time_sel[2];
	long update_count;
}

DECLARE_PERCPU(struct time_struct, cpu_time);

/* Number of times the scheduler is called on each CPU */
DECLARE_PERCPU(unsigned long, sched_nr);

/* On frequency change event */
/* In irq context */
void freq_change_cb(unsigned int new_freq)
{
	struct time_struct this_cpu_time = 
		per_cpu(cpu_time, smp_processor_id());
	struct time_info *write_time, *current_time;
	write_time =
		this_cpu_time->time_sel[(this_cpu_time->update_count+1)&1];
	current_time =
		this_cpu_time->time_sel[(this_cpu_time->update_count)&1];
	write_time->tsc = get_cycles();
	write_time->freq = new_freq;
	/* We cumulate the division imprecision. This is the downside of using
	 * the TSC with variable frequency as a time base. */
	write_time->walltime = 
		current_time->walltime + 
			(write_time->tsc - current_time->tsc) /
			current_time->freq;
	wmb();
	this_cpu_time->update_count++;
}


/* Init cpu freq */
init_cpu_freq()
{
	struct time_struct this_cpu_time = 
		per_cpu(cpu_time, smp_processor_id());
	struct time_info *current_time;
	memset(this_cpu_time, 0, sizeof(this_cpu_time));
	current_time = this_cpu_time->time_sel[this_cpu_time->update_count&1];
	/* Init current time */
	/* Get frequency */
	/* Reset cpus to 0 ns, 0 tsc, start their tsc. */
}


/* After a CPU comes back from hlt */
/* The trick is to sync all the other CPUs on the first CPU up when they come
 * up. If all CPUs are down, then there is no need to increment the walltime :
 * let's simply define the useful walltime on a machine as the time elapsed
 * while there is a CPU running. If we want, when no cpu is active, we can use
 * a lower resolution clock to somehow keep track of walltime. */

wake_from_hlt()
{
	/* TODO */
}



/* Read time from anywhere in the kernel. Return time in walltime. (ns) */
/* If the update_count changes while we read the context, it may be invalid.
 * This would happen if we are scheduled out for a period of time long enough to
 * permit 2 frequency changes. We simply start the loop again if it happens.
 * We detect it by comparing the update_count running counter.
 * We detect preemption by incrementing a counter sched_nr within schedule(). 
 * This counter is readable by user space through the vsyscall page. */
 */
u64 read_time(void)
{
	u64 walltime;
	long update_count;
	struct time_struct this_cpu_time;
	struct time_info *current_time;
	unsigned int cpu;
	long prev_sched_nr;
	do {
		cpu = _smp_processor_id();
		prev_sched_nr = per_cpu(sched_nr, cpu);
		if(cpu != _smp_processor_id())
			continue;	/* changed CPU between CPUID and getting
					   sched_nr */
		this_cpu_time = per_cpu(cpu_time, cpu);
		update_count = this_cpu_time->update_count;
		current_time = this_cpu_time->time_sel[update_count&1];
		walltime = current_time->walltime + 
				(get_cycles() - current_time->tsc) /
				current_time->freq;
		if(per_cpu(sched_nr, cpu) != prev_sched_nr)
			continue;	/* been preempted */
	} while(this_cpu_time->update_count != update_count);
	return walltime;
}

/* Userspace */
/* Export all this data to user space through the vsyscall page. Use a function
 * like read_time to read the walltime. This function can be implemented as-is
 * because it doesn't need to disable preemption. */




-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Candidate, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-24 16:19           ` [RFC] Fast assurate clock readable from user space and NMI handler Mathieu Desnoyers
@ 2007-02-24 18:06             ` Daniel Walker
  2007-02-26 20:53               ` Mathieu Desnoyers
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Walker @ 2007-02-24 18:06 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: mbligh, linux-kernel, johnstul

On Sat, 2007-02-24 at 11:19 -0500, Mathieu Desnoyers wrote:
> Hi,
> 
> I am trying to improve the Linux kernel time source so it can be read
> without seqlock from NMI handlers. I have also seen some interest for
> such an accurate monotonic clock readable from user space. It mainly
> implies an atomic update of the time value. I am also trying to figure a
> way to support architectures with multiple CPUs with non-synchronized
> TSCs.
> 
> I would like to have your comments on the following idea.
> 
> Thanks in advance,
> 
> Mathieu
> 
> 
> Monotonic accurate time
> 
> The goal of this design is to provide a monotonic time :
> 
> Readable from userspace without a system call
> Readable from NMI handler
> Readable without disabling interrupts
> Readable without disabling preemption
> Only one clock source (most precise available : tsc)
> Support architectures with variable TSC frequency.

I don't think you could use only the tsc. From reviewing John, and
Thomas work it's pretty clear the TSC isn't going to work correctly all
the time.

> /* On frequency change event */
> /* In irq context */
> void freq_change_cb(unsigned int new_freq)
> {

It's possible for the TSC to change frequencies without notification. It
can also completely stop when the system goes idle.

> /* Userspace */
> /* Export all this data to user space through the vsyscall page. Use a function
>  * like read_time to read the walltime. This function can be implemented as-is
>  * because it doesn't need to disable preemption. */

What would be the benefit of using this over the vsyscall gettimeofday()
from userspace ? 

Daniel


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-24 18:06             ` Daniel Walker
@ 2007-02-26 20:53               ` Mathieu Desnoyers
  2007-02-26 21:27                 ` Daniel Walker
  0 siblings, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2007-02-26 20:53 UTC (permalink / raw)
  To: Daniel Walker; +Cc: mbligh, linux-kernel, johnstul

* Daniel Walker (dwalker@mvista.com) wrote:
> On Sat, 2007-02-24 at 11:19 -0500, Mathieu Desnoyers wrote:
> > Hi,
> > 
> > I am trying to improve the Linux kernel time source so it can be read
> > without seqlock from NMI handlers. I have also seen some interest for
> > such an accurate monotonic clock readable from user space. It mainly
> > implies an atomic update of the time value. I am also trying to figure a
> > way to support architectures with multiple CPUs with non-synchronized
> > TSCs.
> > 
> > I would like to have your comments on the following idea.
> > 
> > Thanks in advance,
> > 
> > Mathieu
> > 
> > 
> > Monotonic accurate time
> > 
> > The goal of this design is to provide a monotonic time :
> > 
> > Readable from userspace without a system call
> > Readable from NMI handler
> > Readable without disabling interrupts
> > Readable without disabling preemption
> > Only one clock source (most precise available : tsc)
> > Support architectures with variable TSC frequency.
> 
> I don't think you could use only the tsc. From reviewing John, and
> Thomas work it's pretty clear the TSC isn't going to work correctly all
> the time.
> 

Ok, if there are other high precision timers we can use, I guess it may
be better to fall back on them.

> > /* On frequency change event */
> > /* In irq context */
> > void freq_change_cb(unsigned int new_freq)
> > {
> 
> It's possible for the TSC to change frequencies without notification. It
> can also completely stop when the system goes idle.
> 

Hrm, I see. I though those freq change without notification would happen
rarely and could be dealt by resynchronizing the CPUs. I guess I was
wrong.

> > /* Userspace */
> > /* Export all this data to user space through the vsyscall page. Use a function
> >  * like read_time to read the walltime. This function can be implemented as-is
> >  * because it doesn't need to disable preemption. */
> 
> What would be the benefit of using this over the vsyscall gettimeofday()
> from userspace ? 
> 

So we can get a monotonic, non NTP corrected timestamp quickly from user
space without going through a system call. Are there other alternatives ?

Thanks,

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Candidate, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-26 20:53               ` Mathieu Desnoyers
@ 2007-02-26 21:27                 ` Daniel Walker
  2007-02-26 22:14                   ` Mathieu Desnoyers
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Walker @ 2007-02-26 21:27 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: mbligh, linux-kernel, johnstul

On Mon, 2007-02-26 at 15:53 -0500, Mathieu Desnoyers wrote:

> > > /* On frequency change event */
> > > /* In irq context */
> > > void freq_change_cb(unsigned int new_freq)
> > > {
> > 
> > It's possible for the TSC to change frequencies without notification. It
> > can also completely stop when the system goes idle.
> > 
> 
> Hrm, I see. I though those freq change without notification would happen
> rarely and could be dealt by resynchronizing the CPUs. I guess I was
> wrong.

The system could be UP .. I don't think tracking this kind of thing is
trival, and given the TSC track record you have to assume there are will
be other issue in the future.

> > > /* Userspace */
> > > /* Export all this data to user space through the vsyscall page. Use a function
> > >  * like read_time to read the walltime. This function can be implemented as-is
> > >  * because it doesn't need to disable preemption. */
> > 
> > What would be the benefit of using this over the vsyscall gettimeofday()
> > from userspace ? 
> > 
> 
> So we can get a monotonic, non NTP corrected timestamp quickly from user
> space without going through a system call. Are there other alternatives ?

The NTP daemon needs to be active AFAIK before you would start observing
weird time jumps. There are adjustments made without NTP but they are
only seen over short periods.. So I still think gettimeofday() would be
an alternative ..

Have you considered adding something to glibc? You could access only the
TSC from userspace.. I don't think the addition of a vsyscall/syscall
for this would go over too well considering that there are other ways to
get timestamps. It might help if you tell us what you think this would
be used for in userspace ?

Daniel


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-26 21:27                 ` Daniel Walker
@ 2007-02-26 22:14                   ` Mathieu Desnoyers
  2007-02-26 23:12                     ` Daniel Walker
  0 siblings, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2007-02-26 22:14 UTC (permalink / raw)
  To: Daniel Walker; +Cc: mbligh, linux-kernel, johnstul

* Daniel Walker (dwalker@mvista.com) wrote:
> On Mon, 2007-02-26 at 15:53 -0500, Mathieu Desnoyers wrote:
> 
> > > > /* On frequency change event */
> > > > /* In irq context */
> > > > void freq_change_cb(unsigned int new_freq)
> > > > {
> > > 
> > > It's possible for the TSC to change frequencies without notification. It
> > > can also completely stop when the system goes idle.
> > > 
> > 
> > Hrm, I see. I though those freq change without notification would happen
> > rarely and could be dealt by resynchronizing the CPUs. I guess I was
> > wrong.
> 
> The system could be UP .. I don't think tracking this kind of thing is
> trival, and given the TSC track record you have to assume there are will
> be other issue in the future.
> 

The other solution, good for UP, would be to sychronize the TSC with
another clock source (HPET or timer), but it starts to look pretty much
like what is done right now.

> > > > /* Userspace */
> > > > /* Export all this data to user space through the vsyscall page. Use a function
> > > >  * like read_time to read the walltime. This function can be implemented as-is
> > > >  * because it doesn't need to disable preemption. */
> > > 
> > > What would be the benefit of using this over the vsyscall gettimeofday()
> > > from userspace ? 
> > > 
> > 
> > So we can get a monotonic, non NTP corrected timestamp quickly from user
> > space without going through a system call. Are there other alternatives ?
> 
> The NTP daemon needs to be active AFAIK before you would start observing
> weird time jumps. There are adjustments made without NTP but they are
> only seen over short periods.. So I still think gettimeofday() would be
> an alternative ..
> 
> Have you considered adding something to glibc? You could access only the
> TSC from userspace.. I don't think the addition of a vsyscall/syscall
> for this would go over too well considering that there are other ways to
> get timestamps. It might help if you tell us what you think this would
> be used for in userspace ?
> 

For kernel and user space tracing, those small jumps are very annoying :
it can show, in a trace, that a fork() appears on a CPU after the first
schedule() of the thread on the other CPU : scheduling causality relationship
can become very hard to follow. This is only a sample case. Inaccuracy and
periodical modification of the clock time (non monotonic) can cause important
inaccuracy in performance tests, even on UP systems. A monotonic clock,
accessible from anywhere in kernel space (including NMI handler) and
from user space is very useful for performance analysis and, more
generally, for timestamping data in per cpu buffers so it can be later
reordered correctly.

Mathieu

> Daniel
> 

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Candidate, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-26 22:14                   ` Mathieu Desnoyers
@ 2007-02-26 23:12                     ` Daniel Walker
  2007-02-27  3:54                       ` Mathieu Desnoyers
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Walker @ 2007-02-26 23:12 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: mbligh, linux-kernel, johnstul, mingo

On Mon, 2007-02-26 at 17:14 -0500, Mathieu Desnoyers wrote:


> For kernel and user space tracing, those small jumps are very annoying :
> it can show, in a trace, that a fork() appears on a CPU after the first
> schedule() of the thread on the other CPU : scheduling causality relationship
> can become very hard to follow. This is only a sample case. Inaccuracy and
> periodical modification of the clock time (non monotonic) can cause important
> inaccuracy in performance tests, even on UP systems. A monotonic clock,
> accessible from anywhere in kernel space (including NMI handler) and
> from user space is very useful for performance analysis and, more
> generally, for timestamping data in per cpu buffers so it can be later
> reordered correctly.

What about adding a layer below do_gettimeofday() which just scheds the
adjustment process? That might be reasonable .. The NMI, and userspace
cases aren't very compelling right now, at least I'm not convinced a
whole new timing interface is needed ..

The latency tracing system in the -rt branch modifies the gettimeofday
facilities , I'm not sure of the correctness of it but it gets called
from anyplace in the kernel including NMI's . 

Here's the function,

cycle_t notrace get_monotonic_cycles(void)
{
        cycle_t cycle_now, cycle_delta;

        /* read clocksource: */
        cycle_now = clocksource_read(clock);

        /* calculate the delta since the last update_wall_time: */
        cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;

        return clock->cycle_last + cycle_delta;
}

That looks safe. When converting this to nanoseconds you would still get
the time adjustments but it would be all at once instead of in little
increments ..

Daniel


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-26 23:12                     ` Daniel Walker
@ 2007-02-27  3:54                       ` Mathieu Desnoyers
  2007-02-27  4:22                         ` Daniel Walker
  0 siblings, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2007-02-27  3:54 UTC (permalink / raw)
  To: Daniel Walker; +Cc: mbligh, linux-kernel, johnstul, mingo

* Daniel Walker (dwalker@mvista.com) wrote:
> On Mon, 2007-02-26 at 17:14 -0500, Mathieu Desnoyers wrote:
> 
> 
> > For kernel and user space tracing, those small jumps are very annoying :
> > it can show, in a trace, that a fork() appears on a CPU after the first
> > schedule() of the thread on the other CPU : scheduling causality relationship
> > can become very hard to follow. This is only a sample case. Inaccuracy and
> > periodical modification of the clock time (non monotonic) can cause important
> > inaccuracy in performance tests, even on UP systems. A monotonic clock,
> > accessible from anywhere in kernel space (including NMI handler) and
> > from user space is very useful for performance analysis and, more
> > generally, for timestamping data in per cpu buffers so it can be later
> > reordered correctly.
> 
> What about adding a layer below do_gettimeofday() which just scheds the
> adjustment process? That might be reasonable .. The NMI, and userspace
> cases aren't very compelling right now, at least I'm not convinced a
> whole new timing interface is needed ..
> 
> The latency tracing system in the -rt branch modifies the gettimeofday
> facilities , I'm not sure of the correctness of it but it gets called
> from anyplace in the kernel including NMI's . 
> 
> Here's the function,
> 
> cycle_t notrace get_monotonic_cycles(void)
> {
>         cycle_t cycle_now, cycle_delta;
> 
>         /* read clocksource: */
>         cycle_now = clocksource_read(clock);
> 
>         /* calculate the delta since the last update_wall_time: */
>         cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
> 
>         return clock->cycle_last + cycle_delta;
> }
> 
> That looks safe. When converting this to nanoseconds you would still get
> the time adjustments but it would be all at once instead of in little
> increments ..
> 

ouch... if the clocksource used is the PIT on x86 :

static cycle_t pit_read(void)
{
        unsigned long flags;
        int count;
        u32 jifs;
        static int old_count;
        static u32 old_jifs;

        spin_lock_irqsave(&i8253_lock, flags);

If an NMI nests over the spinlock, we have a deadlock.

In addition, clock->cycle_last is a cycle_t, defined as a 64 bits on
x86. If is therefore not updated atomically by change_clocksource,
timekeeping_init, timekeeping_resume and update_wall_time. If an NMI
fires right on top of the update, especially around the 32 bits wrap
around, the time will be really fuzzy.

Mathieu

> Daniel
> 

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-27  3:54                       ` Mathieu Desnoyers
@ 2007-02-27  4:22                         ` Daniel Walker
  2007-02-27  4:47                           ` Mathieu Desnoyers
  2007-02-27  6:29                           ` Ingo Molnar
  0 siblings, 2 replies; 20+ messages in thread
From: Daniel Walker @ 2007-02-27  4:22 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: mbligh, linux-kernel, johnstul, mingo

On Mon, 2007-02-26 at 22:54 -0500, Mathieu Desnoyers wrote:
> If an NMI nests over the spinlock, we have a deadlock.

Maybe not completely safe ...

> In addition, clock->cycle_last is a cycle_t, defined as a 64 bits on
> x86. If is therefore not updated atomically by change_clocksource,
> timekeeping_init, timekeeping_resume and update_wall_time. If an NMI
> fires right on top of the update, especially around the 32 bits wrap
> around, the time will be really fuzzy.

I'm not sure that is particularly significant considering that it's just
a possible bad timestamp, and the probability of that happening seems
rather low .. You could also modify NMI calls so they use a different
time stamping method, like reading the clocksource directly .

The pit clocksource could be dropped pretty easy with my clocksource
update patches, which I'm still working on but you could easily drop
clock sources that aren't atomic like the pit .. Also the pit is
generally undesirable, so it's not going to be missed.

Daniel


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-27  4:22                         ` Daniel Walker
@ 2007-02-27  4:47                           ` Mathieu Desnoyers
  2007-02-27  6:29                           ` Ingo Molnar
  1 sibling, 0 replies; 20+ messages in thread
From: Mathieu Desnoyers @ 2007-02-27  4:47 UTC (permalink / raw)
  To: Daniel Walker; +Cc: mbligh, linux-kernel, johnstul, mingo

* Daniel Walker (dwalker@mvista.com) wrote:
> On Mon, 2007-02-26 at 22:54 -0500, Mathieu Desnoyers wrote:
> > If an NMI nests over the spinlock, we have a deadlock.
> 
> Maybe not completely safe ...
> 
> > In addition, clock->cycle_last is a cycle_t, defined as a 64 bits on
> > x86. If is therefore not updated atomically by change_clocksource,
> > timekeeping_init, timekeeping_resume and update_wall_time. If an NMI
> > fires right on top of the update, especially around the 32 bits wrap
> > around, the time will be really fuzzy.
> 
> I'm not sure that is particularly significant considering that it's just
> a possible bad timestamp, and the probability of that happening seems
> rather low .. You could also modify NMI calls so they use a different
> time stamping method, like reading the clocksource directly .
> 

Since you do not disable interrupts around the clocksource read, the
same problem applies to interrupt handlers of higher priority than the
cycle_last updating code.

A bad timestamp can make a trace quite hard to follow and more
error-prone. When someone is looking for _the_ failing case in a system,
the infrastructure used to debug must be reliable. Sometimes error cases
takes days before showing up : we can't afford to be unsure about the
precision of the tracer.

Also, the goal is to have a generic monotonic timestamp readable from
everywhere. Excluding execution contexts doesn't seem like such a great
idea (it just replicates the same problem somewhere else).

> The pit clocksource could be dropped pretty easy with my clocksource
> update patches, which I'm still working on but you could easily drop
> clock sources that aren't atomic like the pit .. Also the pit is
> generally undesirable, so it's not going to be missed.
> 

Still important for old architectures where PIT is the only available
clock source I guess. However, the clocksource struct should at least 
tell if the time can be read atomically and offer a different API for
atomic vs non atomic read of time source, returning an error if no
atomic time source is available.

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-27  4:22                         ` Daniel Walker
  2007-02-27  4:47                           ` Mathieu Desnoyers
@ 2007-02-27  6:29                           ` Ingo Molnar
  2007-02-27  7:38                             ` Mathieu Desnoyers
  2007-02-27  9:59                             ` Daniel Walker
  1 sibling, 2 replies; 20+ messages in thread
From: Ingo Molnar @ 2007-02-27  6:29 UTC (permalink / raw)
  To: Daniel Walker
  Cc: Mathieu Desnoyers, mbligh, linux-kernel, johnstul, Thomas Gleixner


* Daniel Walker <dwalker@mvista.com> wrote:

> The pit clocksource could be dropped pretty easy with my clocksource 
> update patches, which I'm still working on but you could easily drop 
> clock sources that aren't atomic like the pit .. Also the pit is 
> generally undesirable, so it's not going to be missed.

that's totally unacceptable, and i'm amazed you are even suggesting it - 
often the PIT ends up being the most reliable hardware clock in a PC. 
Btw., what's wrong with the spinlock that is protecting PIT access? It 
expresses the non-atomic property of the PIT just fine.

	Ingo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-27  6:29                           ` Ingo Molnar
@ 2007-02-27  7:38                             ` Mathieu Desnoyers
  2007-02-27  8:48                               ` Thomas Gleixner
  2007-02-27 10:18                               ` Daniel Walker
  2007-02-27  9:59                             ` Daniel Walker
  1 sibling, 2 replies; 20+ messages in thread
From: Mathieu Desnoyers @ 2007-02-27  7:38 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Daniel Walker, mbligh, linux-kernel, johnstul, Thomas Gleixner

* Ingo Molnar (mingo@elte.hu) wrote:
> 
> * Daniel Walker <dwalker@mvista.com> wrote:
> 
> > The pit clocksource could be dropped pretty easy with my clocksource 
> > update patches, which I'm still working on but you could easily drop 
> > clock sources that aren't atomic like the pit .. Also the pit is 
> > generally undesirable, so it's not going to be missed.
> 
> that's totally unacceptable, and i'm amazed you are even suggesting it - 
> often the PIT ends up being the most reliable hardware clock in a PC. 
> Btw., what's wrong with the spinlock that is protecting PIT access? It 
> expresses the non-atomic property of the PIT just fine.
> 

I am concerned about the automatic fallback to the PIT when no other
clock source is available. A clocksource read would be atomic when TSC
or HPET are available, but would fall back on PIT otherwise. There
should be some way to specify that a caller is only interested in atomic
clock sources (if none are available, the call should simply return an
error, or 0).

I still think that an RCU style update mechanism would be a good way  to
fix the current clocksource read issue. Another, slower and non NMI
safe way to do this would be with a read seqlock and with IRQ disabling.

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-27  7:38                             ` Mathieu Desnoyers
@ 2007-02-27  8:48                               ` Thomas Gleixner
  2007-02-27 10:18                               ` Daniel Walker
  1 sibling, 0 replies; 20+ messages in thread
From: Thomas Gleixner @ 2007-02-27  8:48 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Ingo Molnar, Daniel Walker, mbligh, linux-kernel, johnstul

On Tue, 2007-02-27 at 02:38 -0500, Mathieu Desnoyers wrote:
> > > The pit clocksource could be dropped pretty easy with my clocksource 
> > > update patches, which I'm still working on but you could easily drop 
> > > clock sources that aren't atomic like the pit .. Also the pit is 
> > > generally undesirable, so it's not going to be missed.
> > 
> > that's totally unacceptable, and i'm amazed you are even suggesting it - 
> > often the PIT ends up being the most reliable hardware clock in a PC. 
> > Btw., what's wrong with the spinlock that is protecting PIT access? It 
> > expresses the non-atomic property of the PIT just fine.
> > 
> 
> I am concerned about the automatic fallback to the PIT when no other
> clock source is available. 

And what are you going to use then ? Just kill the box, when nothing
else than PIT is there ? 

	tglx



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-27  6:29                           ` Ingo Molnar
  2007-02-27  7:38                             ` Mathieu Desnoyers
@ 2007-02-27  9:59                             ` Daniel Walker
  1 sibling, 0 replies; 20+ messages in thread
From: Daniel Walker @ 2007-02-27  9:59 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Mathieu Desnoyers, mbligh, linux-kernel, johnstul, Thomas Gleixner

On Tue, 2007-02-27 at 07:29 +0100, Ingo Molnar wrote:
> * Daniel Walker <dwalker@mvista.com> wrote:
> 
> > The pit clocksource could be dropped pretty easy with my clocksource 
> > update patches, which I'm still working on but you could easily drop 
> > clock sources that aren't atomic like the pit .. Also the pit is 
> > generally undesirable, so it's not going to be missed.
> 
> that's totally unacceptable, and i'm amazed you are even suggesting it - 
> often the PIT ends up being the most reliable hardware clock in a PC. 
> Btw., what's wrong with the spinlock that is protecting PIT access? It 
> expresses the non-atomic property of the PIT just fine.

Just considering the rating is lower than the acpi_pm (and the TSC), and
it's not even considered on SMP systems is enough for me .. It's just a
problematic clock.. 

Again, I'm not suggesting we drop it all the time, just for a special
case when Mathieu needs it dropped.

Daniel


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-27  7:38                             ` Mathieu Desnoyers
  2007-02-27  8:48                               ` Thomas Gleixner
@ 2007-02-27 10:18                               ` Daniel Walker
  2007-02-27 16:02                                 ` Mathieu Desnoyers
  1 sibling, 1 reply; 20+ messages in thread
From: Daniel Walker @ 2007-02-27 10:18 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Ingo Molnar, mbligh, linux-kernel, johnstul, Thomas Gleixner

On Tue, 2007-02-27 at 02:38 -0500, Mathieu Desnoyers wrote:

> 
> I am concerned about the automatic fallback to the PIT when no other
> clock source is available. A clocksource read would be atomic when TSC
> or HPET are available, but would fall back on PIT otherwise. There
> should be some way to specify that a caller is only interested in atomic
> clock sources (if none are available, the call should simply return an
> error, or 0).
> 
> I still think that an RCU style update mechanism would be a good way  to
> fix the current clocksource read issue. Another, slower and non NMI
> safe way to do this would be with a read seqlock and with IRQ disabling.

I'm not sure what you mean by using the RCU, but the pit clocksource
does disable interrupts with a spin_lock_irqsave().

Daniel


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-27 10:18                               ` Daniel Walker
@ 2007-02-27 16:02                                 ` Mathieu Desnoyers
  2007-02-27 17:24                                   ` Daniel Walker
  0 siblings, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2007-02-27 16:02 UTC (permalink / raw)
  To: Daniel Walker
  Cc: Ingo Molnar, mbligh, linux-kernel, johnstul, Thomas Gleixner

* Daniel Walker (dwalker@mvista.com) wrote:
> On Tue, 2007-02-27 at 02:38 -0500, Mathieu Desnoyers wrote:
> 
> > 
> > I am concerned about the automatic fallback to the PIT when no other
> > clock source is available. A clocksource read would be atomic when TSC
> > or HPET are available, but would fall back on PIT otherwise. There
> > should be some way to specify that a caller is only interested in atomic
> > clock sources (if none are available, the call should simply return an
> > error, or 0).
> > 
> I'm not sure what you mean by using the RCU

The original proposal of this thread uses a RCU (read-copy-update) style
update of the previous 64 bits counter : it swaps a pointer (atomically)
upon update by incrementing a word-sized counter that is used, by the
reader, to get the offest in the array (with a modulo operation) for the
current readable data and as a way to detect incorrect reads of
overwritten information (we re-read the word-sized counter after having
read the data structure to make sure is has not been incremented. If we
detect an increment, we redo the whole operation).

> > I still think that an RCU style update mechanism would be a good way  to
> > fix the current clocksource read issue. Another, slower and non NMI
> > safe way to do this would be with a read seqlock and with IRQ disabling.
> 
> , but the pit clocksource
> does disable interrupts with a spin_lock_irqsave().
> 

When I say "clocksource read issue", I am talking about
race between the function you proposed earlier, which you say is used in
-rt kernels for latency tracing (get_monotonic_cycles), and HPET and TSC
"last cycles" updates.

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-27 16:02                                 ` Mathieu Desnoyers
@ 2007-02-27 17:24                                   ` Daniel Walker
  2007-02-27 19:04                                     ` Mathieu Desnoyers
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Walker @ 2007-02-27 17:24 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Ingo Molnar, mbligh, linux-kernel, johnstul, Thomas Gleixner

On Tue, 2007-02-27 at 11:02 -0500, Mathieu Desnoyers wrote:
> * Daniel Walker (dwalker@mvista.com) wrote:
> > On Tue, 2007-02-27 at 02:38 -0500, Mathieu Desnoyers wrote:
> > 
> > > 
> > > I am concerned about the automatic fallback to the PIT when no other
> > > clock source is available. A clocksource read would be atomic when TSC
> > > or HPET are available, but would fall back on PIT otherwise. There
> > > should be some way to specify that a caller is only interested in atomic
> > > clock sources (if none are available, the call should simply return an
> > > error, or 0).
> > > 
> > I'm not sure what you mean by using the RCU
> 
> The original proposal of this thread uses a RCU (read-copy-update) style
> update of the previous 64 bits counter : it swaps a pointer (atomically)
> upon update by incrementing a word-sized counter that is used, by the
> reader, to get the offest in the array (with a modulo operation) for the
> current readable data and as a way to detect incorrect reads of
> overwritten information (we re-read the word-sized counter after having
> read the data structure to make sure is has not been incremented. If we
> detect an increment, we redo the whole operation).

I didn't see RCU at all in your original message, so I'm not sure how
you propose to use it .. My understanding of the RCU was that it
couldn't be used from interrupt context, that could be totally wrong so
I'll let you explain how you planed to use it.

> > > I still think that an RCU style update mechanism would be a good way  to
> > > fix the current clocksource read issue. Another, slower and non NMI
> > > safe way to do this would be with a read seqlock and with IRQ disabling.
> > 
> > , but the pit clocksource
> > does disable interrupts with a spin_lock_irqsave().
> > 
> 
> When I say "clocksource read issue", I am talking about
> race between the function you proposed earlier, which you say is used in
> -rt kernels for latency tracing (get_monotonic_cycles), and HPET and TSC
> "last cycles" updates.

Right .. You said that regular interrupts would cause this non-atomic
64-bit update race , but the pit disabled interrupts, and the
last_cycles update is done with interrupts off .. So I think we're back
to only the NMI case ..

Did you have another scenario ?

Daniel


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-27 17:24                                   ` Daniel Walker
@ 2007-02-27 19:04                                     ` Mathieu Desnoyers
  2007-02-27 19:40                                       ` john stultz
  2007-02-27 20:09                                       ` Daniel Walker
  0 siblings, 2 replies; 20+ messages in thread
From: Mathieu Desnoyers @ 2007-02-27 19:04 UTC (permalink / raw)
  To: Daniel Walker
  Cc: Ingo Molnar, mbligh, linux-kernel, johnstul, Thomas Gleixner

* Daniel Walker (dwalker@mvista.com) wrote:
> On Tue, 2007-02-27 at 11:02 -0500, Mathieu Desnoyers wrote:
> > * Daniel Walker (dwalker@mvista.com) wrote:
> > > On Tue, 2007-02-27 at 02:38 -0500, Mathieu Desnoyers wrote:
> > > 
> > > > 
> > > > I am concerned about the automatic fallback to the PIT when no other
> > > > clock source is available. A clocksource read would be atomic when TSC
> > > > or HPET are available, but would fall back on PIT otherwise. There
> > > > should be some way to specify that a caller is only interested in atomic
> > > > clock sources (if none are available, the call should simply return an
> > > > error, or 0).
> > > > 
> > > I'm not sure what you mean by using the RCU
> > 
> > The original proposal of this thread uses a RCU (read-copy-update) style
> > update of the previous 64 bits counter : it swaps a pointer (atomically)
> > upon update by incrementing a word-sized counter that is used, by the
> > reader, to get the offest in the array (with a modulo operation) for the
> > current readable data and as a way to detect incorrect reads of
> > overwritten information (we re-read the word-sized counter after having
> > read the data structure to make sure is has not been incremented. If we
> > detect an increment, we redo the whole operation).
> 
> I didn't see RCU at all in your original message, so I'm not sure how
> you propose to use it .. My understanding of the RCU was that it
> couldn't be used from interrupt context, that could be totally wrong so
> I'll let you explain how you planed to use it.
> 

1 - I do not plan to use the rcupdate.h API, because it is oriented
towards allowing/freeing data structures after a quiescent state. I
don't need that. I only want to have a 64 bits data structure valid for
reading, with atomic update. Therefore, I keep an array of 2 64 bits
structures. At all time, there is one used as "readable" value and the other
as "writeable". The role is exchanged at each update. The word-sized
counter is used to select the current read and write pointers through a
mask, and is also used to detect bad reads (is a read is preempted, and
then we have 2 updates, the reader could read a bad value without
knowing it). By keeping a word-sized counter of the number of updates,
we have 32 (or 64) bits (depending on the architecture) before the wrap
around, which should not happen even in a far future.



> > > > I still think that an RCU style update mechanism would be a good way  to
> > > > fix the current clocksource read issue. Another, slower and non NMI
> > > > safe way to do this would be with a read seqlock and with IRQ disabling.
> > > 
> > > , but the pit clocksource
> > > does disable interrupts with a spin_lock_irqsave().
> > > 
> > 
> > When I say "clocksource read issue", I am talking about
> > race between the function you proposed earlier, which you say is used in
> > -rt kernels for latency tracing (get_monotonic_cycles), and HPET and TSC
> > "last cycles" updates.
> 
> Right .. You said that regular interrupts would cause this non-atomic
> 64-bit update race , but the pit disabled interrupts, and the
> last_cycles update is done with interrupts off .. So I think we're back
> to only the NMI case ..
> 
> Did you have another scenario ?
> 

__get_nsec_offset : reads clock->cycle_last. Should be called with
xtime_lock held. (ok so far, but see below)

change_clocksource
clock->cycle_last = now; (non atomic 64 bits update. Not protected by
any lock ?) -> this would race with __get_nsec_offset ?

update_wall_time
Called from timer interrupt. Holds xtime_lock and has a priority higher
than other interrupts. Other clock->cycle_last protected by
write_seqlock_irqsave.

get_monotonic_cycles (as you proposed, in -rt kernels) :
reads clock->cycle_last. Not protected by any read seqlock and does not
disable interrupts. Races with change_clocksource, update_wall_time and
all other time update functions. For instance, is someone uses
get_monotonic_cycles in process context and the timer interrupt fires
update_wall_time right at the middle of the 2 32 bits read, the value
will be wrong.

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-27 19:04                                     ` Mathieu Desnoyers
@ 2007-02-27 19:40                                       ` john stultz
  2007-02-27 20:09                                       ` Daniel Walker
  1 sibling, 0 replies; 20+ messages in thread
From: john stultz @ 2007-02-27 19:40 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Daniel Walker, Ingo Molnar, mbligh, linux-kernel, Thomas Gleixner

On Tue, 2007-02-27 at 14:04 -0500, Mathieu Desnoyers wrote:
> __get_nsec_offset : reads clock->cycle_last. Should be called with
> xtime_lock held. (ok so far, but see below)
> 
> change_clocksource
> clock->cycle_last = now; (non atomic 64 bits update. Not protected by
> any lock ?) -> this would race with __get_nsec_offset ?

Minor nit (it could probably use a comment fixup): its only called from
update_wall_time, which holds xtime_lock. 

> update_wall_time
> Called from timer interrupt. Holds xtime_lock and has a priority higher
> than other interrupts. Other clock->cycle_last protected by
> write_seqlock_irqsave.

-john


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] Fast assurate clock readable from user space and NMI handler
  2007-02-27 19:04                                     ` Mathieu Desnoyers
  2007-02-27 19:40                                       ` john stultz
@ 2007-02-27 20:09                                       ` Daniel Walker
  1 sibling, 0 replies; 20+ messages in thread
From: Daniel Walker @ 2007-02-27 20:09 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Ingo Molnar, mbligh, linux-kernel, johnstul, Thomas Gleixner

On Tue, 2007-02-27 at 14:04 -0500, Mathieu Desnoyers wrote:

> 1 - I do not plan to use the rcupdate.h API, because it is oriented
> towards allowing/freeing data structures after a quiescent state. I
> don't need that. I only want to have a 64 bits data structure valid for
> reading, with atomic update. Therefore, I keep an array of 2 64 bits
> structures. At all time, there is one used as "readable" value and the other
> as "writeable". The role is exchanged at each update. The word-sized
> counter is used to select the current read and write pointers through a
> mask, and is also used to detect bad reads (is a read is preempted, and
> then we have 2 updates, the reader could read a bad value without
> knowing it). By keeping a word-sized counter of the number of updates,
> we have 32 (or 64) bits (depending on the architecture) before the wrap
> around, which should not happen even in a far future.

Sounds like a special case RCU system .. If you wanted to add this time
stamping system to Linux, the only acceptable way to add it, IMO, would
be to replace or extend gettimeofday .. You would also need a
justification for the changes, which right now is likely only LTT .. 

> __get_nsec_offset : reads clock->cycle_last. Should be called with
> xtime_lock held. (ok so far, but see below)
> 
> change_clocksource
> clock->cycle_last = now; (non atomic 64 bits update. Not protected by
> any lock ?) -> this would race with __get_nsec_offset ?
> 
> update_wall_time
> Called from timer interrupt. Holds xtime_lock and has a priority higher
> than other interrupts. Other clock->cycle_last protected by
> write_seqlock_irqsave.

update_wall_time, change_clocksource, __get_nsec_offset all hold the
xtime_lock w/ interrupts disabled.

> get_monotonic_cycles (as you proposed, in -rt kernels) :
> reads clock->cycle_last. Not protected by any read seqlock and does not
> disable interrupts. Races with change_clocksource, update_wall_time and
> all other time update functions. For instance, is someone uses
> get_monotonic_cycles in process context and the timer interrupt fires
> update_wall_time right at the middle of the 2 32 bits read, the value
> will be wrong.

I guess that's true.. You also have to assume that the upper 32 bits
have actually changed, the TSC is the only 64-bit clock in linux right
now.. 

Daniel


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2007-02-27 20:11 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-11-24 21:59 [PATCH 8/16] LTTng 0.6.36 for 2.6.18 : Timestamp Mathieu Desnoyers
     [not found] ` <1164475747.5196.5.camel@localhost.localdomain>
     [not found]   ` <20061126170542.GA30771@Krystal>
     [not found]     ` <1164561427.16871.14.camel@localhost.localdomain>
     [not found]       ` <20061126231833.GA22241@Krystal>
     [not found]         ` <1164585589.16871.52.camel@localhost.localdomain>
2007-02-24 16:19           ` [RFC] Fast assurate clock readable from user space and NMI handler Mathieu Desnoyers
2007-02-24 18:06             ` Daniel Walker
2007-02-26 20:53               ` Mathieu Desnoyers
2007-02-26 21:27                 ` Daniel Walker
2007-02-26 22:14                   ` Mathieu Desnoyers
2007-02-26 23:12                     ` Daniel Walker
2007-02-27  3:54                       ` Mathieu Desnoyers
2007-02-27  4:22                         ` Daniel Walker
2007-02-27  4:47                           ` Mathieu Desnoyers
2007-02-27  6:29                           ` Ingo Molnar
2007-02-27  7:38                             ` Mathieu Desnoyers
2007-02-27  8:48                               ` Thomas Gleixner
2007-02-27 10:18                               ` Daniel Walker
2007-02-27 16:02                                 ` Mathieu Desnoyers
2007-02-27 17:24                                   ` Daniel Walker
2007-02-27 19:04                                     ` Mathieu Desnoyers
2007-02-27 19:40                                       ` john stultz
2007-02-27 20:09                                       ` Daniel Walker
2007-02-27  9:59                             ` Daniel Walker

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.