From: jbohac@suse.cz
To: Andi Kleen <ak@suse.de>
Cc: linux-kernel@vger.kernel.org, Jiri Bohac <jbohac@suse.cz>,
Vojtech Pavlik <vojtech@suse.cz>,
ssouhlal@freebsd.org, arjan@infradead.org, tglx@linutronix.de,
johnstul@us.ibm.com, zippel@linux-m68k.org, andrea@suse.de
Subject: [patch 9/9] Make use of the Master Timer
Date: Thu, 01 Feb 2007 11:00:01 +0100 [thread overview]
Message-ID: <20070201103754.281474000@jet.suse.cz> (raw)
In-Reply-To: 20070201095952.589234000@jet.suse.cz
[-- Attachment #1: use_master_timer --]
[-- Type: text/plain, Size: 12584 bytes --]
Make use of the whole Master Timer infrastructure in gettimeofday,
monotonic_clock, etc.
Also make the vsyscall version of gettimeofday use the guess_mt() if
possible.
Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Index: linux-2.6.20-rc5/arch/x86_64/kernel/time.c
===================================================================
--- linux-2.6.20-rc5.orig/arch/x86_64/kernel/time.c
+++ linux-2.6.20-rc5/arch/x86_64/kernel/time.c
@@ -341,27 +341,48 @@ inline u64 mt_to_nsec(u64 mt)
}
/*
- * do_gettimeoffset() returns microseconds since last timer interrupt was
+ * do_gettimeoffset() returns nanoseconds since last timer interrupt was
* triggered by hardware. A memory read of HPET is slower than a register read
* of TSC, but much more reliable. It's also synchronized to the timer
* interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
* timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
* This is not a problem, because jiffies hasn't updated either. They are bound
* together by xtime_lock.
+ *
+ * If used_mt is not null, it will be filled with the master timer value
+ * used for the calculation
*/
-static inline unsigned int do_gettimeoffset_tsc(void)
+static inline s64 do_gettimeoffset(u64 *used_mt)
{
- unsigned long t;
- unsigned long x;
- t = get_cycles_sync();
- if (t < vxtime.last_tsc)
- t = vxtime.last_tsc; /* hack */
- x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
- return x;
-}
+ int cpu = 0;
+ u64 tsc = 0, mt;
+ switch (vxtime.mode) {
+
+ case VXTIME_TSC:
+ rdtscll(tsc);
+ break;
+
+ case VXTIME_TSCP:
+ rdtscpll(tsc, cpu);
+ cpu &= 0xfff;
+ break;
-unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
+ case VXTIME_TSCS:
+ case VXTIME_TSCM:
+ preempt_disable();
+ cpu = smp_processor_id();
+ rdtscll(tsc);
+ preempt_enable();
+ break;
+ }
+
+ mt = guess_mt(tsc, cpu);
+ if (used_mt)
+ *used_mt = mt;
+
+ return (((s64)(mt - vxtime.mt_wall)) * (s64)vxtime.mt_q) >> 32;
+}
/*
* This version of gettimeofday() has microsecond resolution and better than
@@ -372,28 +393,32 @@ unsigned int (*do_gettimeoffset)(void) =
void do_gettimeofday(struct timeval *tv)
{
unsigned long seq;
- unsigned int sec, usec;
+ unsigned int sec;
+ int nsec;
+ u64 mt;
do {
seq = read_seqbegin(&xtime_lock);
sec = xtime.tv_sec;
- usec = xtime.tv_nsec / NSEC_PER_USEC;
+ nsec = xtime.tv_nsec;
- /* i386 does some correction here to keep the clock
- monotonous even when ntpd is fixing drift.
- But they didn't work for me, there is a non monotonic
- clock anyways with ntp.
- I dropped all corrections now until a real solution can
- be found. Note when you fix it here you need to do the same
- in arch/x86_64/kernel/vsyscall.c and export all needed
- variables in vmlinux.lds. -AK */
- usec += do_gettimeoffset();
+ nsec += max(do_gettimeoffset(&mt), vxtime.ns_drift);
} while (read_seqretry(&xtime_lock, seq));
- tv->tv_sec = sec + usec / USEC_PER_SEC;
- tv->tv_usec = usec % USEC_PER_SEC;
+ /* this must be done outside the seqlock loop. Until the loop has finished,
+ the mt may be completely wrong, calculated from incosistent data */
+ update_monotonic_mt(mt);
+
+ sec += nsec / NSEC_PER_SEC;
+ nsec %= NSEC_PER_SEC;
+ if (nsec < 0) {
+ --sec;
+ nsec += NSEC_PER_SEC;
+ }
+ tv->tv_sec = sec;
+ tv->tv_usec = nsec / NSEC_PER_USEC;
}
EXPORT_SYMBOL(do_gettimeofday);
@@ -408,13 +433,13 @@ int do_settimeofday(struct timespec *tv)
{
time_t wtm_sec, sec = tv->tv_sec;
long wtm_nsec, nsec = tv->tv_nsec;
+ unsigned long flags;
if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
+ write_seqlock_irqsave(&xtime_lock, flags);
- write_seqlock_irq(&xtime_lock);
-
- nsec -= do_gettimeoffset() * NSEC_PER_USEC;
+ nsec -= do_gettimeoffset(NULL);
wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
@@ -424,7 +449,7 @@ int do_settimeofday(struct timespec *tv)
ntp_clear();
- write_sequnlock_irq(&xtime_lock);
+ write_sequnlock_irqrestore(&xtime_lock, flags);
clock_was_set();
return 0;
}
@@ -519,27 +544,32 @@ static void set_rtc_mmss(unsigned long n
spin_unlock(&rtc_lock);
}
-
/* monotonic_clock(): returns # of nanoseconds passed since time_init()
* Note: This function is required to return accurate
* time even in the absence of multiple timer ticks.
*/
-static inline unsigned long long cycles_2_ns(unsigned long long cyc);
unsigned long long monotonic_clock(void)
{
- unsigned long seq;
- u32 last_offset, this_offset, offset;
- unsigned long long base;
+ int cpu;
+ unsigned long flags;
+ u64 t;
- do {
- seq = read_seqbegin(&xtime_lock);
+ /* any code that modifies the per-CPU variables used in guess_mt
+ will always run on this CPU, so we don't need to lock the xtime_lock
+ here. If we did, it would create a deadlock on debug printks (and
+ possibly elsewhere) called from other critical sections protected by
+ the lock */
- last_offset = vxtime.last_tsc;
- base = monotonic_base;
- } while (read_seqretry(&xtime_lock, seq));
- this_offset = get_cycles_sync();
- offset = cycles_2_ns(this_offset - last_offset);
- return base + offset;
+ local_irq_save(flags);
+
+ cpu = smp_processor_id();
+ rdtscll(t);
+ t = guess_mt(t, cpu);
+ update_monotonic_mt(t);
+
+ local_irq_restore(flags);
+
+ return mt_to_nsec(t);
}
EXPORT_SYMBOL(monotonic_clock);
@@ -573,62 +603,54 @@ static noinline void handle_lost_ticks(i
void main_timer_handler(void)
{
static unsigned long rtc_update = 0;
- unsigned long tsc;
- int delay = 0, offset = 0, lost = 0;
-
-/*
- * Here we are in the timer irq handler. We have irqs locally disabled (so we
- * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
- * on the other CPU, so we need a lock. We also need to lock the vsyscall
- * variables, because both do_timer() and us change them -arca+vojtech
- */
-
- write_seqlock(&xtime_lock);
+ unsigned long flags;
+ u64 mt;
+ int ticks, i;
+ u64 xtime_nsecs, mt_ticks;
- if (vxtime.hpet_address)
- offset = hpet_readl(HPET_COUNTER);
+ write_seqlock_irqsave(&xtime_lock, flags);
- if (hpet_use_timer) {
- /* if we're using the hpet timer functionality,
- * we can more accurately know the counter value
- * when the timer interrupt occured.
- */
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- delay = hpet_readl(HPET_COUNTER) - offset;
+ mt = update_master_timer64();
+ ticks = (mt - vxtime.mt_wall + mt_per_tick / 2) / mt_per_tick;
+ mt_ticks = ticks * mt_per_tick;
+
+ if (ticks > 1) {
+ handle_lost_ticks(ticks - 1);
+ jiffies += ticks - 1;
}
- tsc = get_cycles_sync();
-
- offset = (((tsc - vxtime.last_tsc) *
- vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK;
- if (offset < 0)
- offset = 0;
+/*
+ * Do the timer stuff.
+ * NTP will cause the actual increment of xtime to be slightly different from
+ * NSEC_PER_TICK, so we set xtime.ns_drift to the difference. This will be used
+ * by do_gettimeofday() to make sure the time stays monotonic.
+ */
- if (offset > USEC_PER_TICK) {
- lost = offset / USEC_PER_TICK;
- offset %= USEC_PER_TICK;
+ xtime_nsecs = xtime.tv_sec * NSEC_PER_SEC + xtime.tv_nsec;
+ for (i = 0; i < ticks; ++i)
+ do_timer(1);
+ xtime_nsecs = xtime.tv_sec * NSEC_PER_SEC + xtime.tv_nsec - xtime_nsecs;
- monotonic_base += cycles_2_ns(tsc - vxtime.last_tsc);
+ vxtime.ns_drift = (mt_ticks * mtq >> 32) - xtime_nsecs;
+ vxtime.mt_wall += mt_ticks;
- vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
+/*
+ * If we have an externally synchronized Linux clock, then update CMOS clock
+ * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
+ * closest to exactly 500 ms before the next second. If the update fails, we
+ * don't care, as it'll be updated on the next turn, and the problem (time way
+ * off) isn't likely to go away much sooner anyway.
+ */
- if ((((tsc - vxtime.last_tsc) *
- vxtime.tsc_quot) >> US_SCALE) < offset)
- vxtime.last_tsc = tsc -
- (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
+ if (ntp_synced() && xtime.tv_sec > rtc_update &&
+ abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
+ set_rtc_mmss(xtime.tv_sec);
+ rtc_update = xtime.tv_sec + 660;
}
- if (lost > 0)
- handle_lost_ticks(lost);
- else
- lost = 0;
-
-/*
- * Do the timer stuff.
- */
+ write_sequnlock_irqrestore(&xtime_lock, flags);
- do_timer(lost + 1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
#endif
@@ -642,21 +664,6 @@ void main_timer_handler(void)
if (!using_apic_timer)
smp_local_timer_interrupt();
-/*
- * If we have an externally synchronized Linux clock, then update CMOS clock
- * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
- * closest to exactly 500 ms before the next second. If the update fails, we
- * don't care, as it'll be updated on the next turn, and the problem (time way
- * off) isn't likely to go away much sooner anyway.
- */
-
- if (ntp_synced() && xtime.tv_sec > rtc_update &&
- abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
- set_rtc_mmss(xtime.tv_sec);
- rtc_update = xtime.tv_sec + 660;
- }
-
- write_sequnlock(&xtime_lock);
}
static irqreturn_t timer_interrupt(int irq, void *dev_id)
@@ -669,24 +676,9 @@ static irqreturn_t timer_interrupt(int i
return IRQ_HANDLED;
}
-static unsigned int cyc2ns_scale __read_mostly;
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> NS_SCALE;
-}
-
unsigned long long sched_clock(void)
{
- unsigned long a = 0;
-
- rdtscll(a);
- return cycles_2_ns(a);
+ return monotonic_clock();
}
static unsigned long get_cmos_time(void)
Index: linux-2.6.20-rc5/arch/x86_64/kernel/vsyscall.c
===================================================================
--- linux-2.6.20-rc5.orig/arch/x86_64/kernel/vsyscall.c
+++ linux-2.6.20-rc5/arch/x86_64/kernel/vsyscall.c
@@ -61,24 +61,35 @@ static __always_inline void timeval_norm
}
}
-static __always_inline void do_vgettimeofday(struct timeval * tv)
+static __always_inline u64 __guess_mt(u64 tsc, int cpu)
{
- long sequence, t;
- unsigned long sec, usec;
+ return (((tsc - __vxtime.cpu[cpu].tsc_last) * __vxtime.cpu[cpu].tsc_slope)
+ >> TSC_SLOPE_SCALE) + __vxtime.cpu[cpu].mt_base;
+}
+
+#define USEC_PER_TICK (USEC_PER_SEC / HZ)
+static __always_inline s64 __do_gettimeoffset(u64 tsc, int cpu)
+{
+ return (((s64)(__guess_mt(tsc, cpu) - __vxtime.mt_wall)) * (s64)__vxtime.mt_q) >> 32;
+}
+
+static __always_inline void do_vgettimeofday(struct timeval * tv, u64 tsc, int cpu)
+{
+ unsigned int sec;
+ s64 nsec;
- do {
- sequence = read_seqbegin(&__xtime_lock);
-
- sec = __xtime.tv_sec;
- usec = __xtime.tv_nsec / 1000;
-
- usec += ((readl((void __iomem *)
- fix_to_virt(VSYSCALL_HPET) + 0xf0) -
- __vxtime.last) * __vxtime.quot) >> 32;
- } while (read_seqretry(&__xtime_lock, sequence));
+ sec = __xtime.tv_sec;
+ nsec = __xtime.tv_nsec;
+ nsec += max(__do_gettimeoffset(tsc, cpu), __vxtime.drift);
- tv->tv_sec = sec + usec / 1000000;
- tv->tv_usec = usec % 1000000;
+ sec += nsec / NSEC_PER_SEC;
+ nsec %= NSEC_PER_SEC;
+ if (nsec < 0) {
+ --sec;
+ nsec += NSEC_PER_SEC;
+ }
+ tv->tv_sec = sec;
+ tv->tv_usec = nsec / NSEC_PER_USEC;
}
/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
@@ -107,10 +118,39 @@ static __always_inline long time_syscall
int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
{
- if (!__sysctl_vsyscall)
+ int cpu = 0;
+ u64 tsc;
+ unsigned long seq;
+ int do_syscall = !__sysctl_vsyscall;
+
+ if (tv && !do_syscall)
+ switch (__vxtime.mode) {
+ case VXTIME_TSC:
+ case VXTIME_TSCP:
+ do {
+ seq = read_seqbegin(&__xtime_lock);
+
+ if (__vxtime.mode == VXTIME_TSC)
+ rdtscll(tsc);
+ else {
+ rdtscpll(tsc, cpu);
+ cpu &= 0xfff;
+ }
+
+ if (unlikely(__vxtime.cpu[cpu].tsc_invalid))
+ do_syscall = 1;
+ else
+ do_vgettimeofday(tv, tsc, cpu);
+
+ } while (read_seqretry(&__xtime_lock, seq));
+ break;
+ default:
+ do_syscall = 1;
+ }
+
+ if (do_syscall)
return gettimeofday(tv,tz);
- if (tv)
- do_vgettimeofday(tv);
+
if (tz)
do_get_tz(tz);
return 0;
--
next prev parent reply other threads:[~2007-02-01 11:14 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-02-01 9:59 [patch 0/9] x86_64: reliable TSC-based gettimeofday jbohac
2007-02-01 9:59 ` [patch 1/9] Fix HPET init race jbohac
2007-02-02 2:34 ` Andrew Morton
2007-02-06 16:44 ` Jiri Bohac
2007-02-07 0:12 ` Andrew Morton
2007-02-10 12:31 ` Andi Kleen
2007-07-26 20:58 ` Robin Holt
2007-02-01 9:59 ` [patch 2/9] Remove the support for the VXTIME_PMTMR timer mode jbohac
2007-02-01 11:13 ` Andi Kleen
2007-02-01 13:13 ` Jiri Bohac
2007-02-01 13:13 ` Andi Kleen
2007-02-01 13:59 ` Jiri Bohac
2007-02-01 14:18 ` Andi Kleen
2007-02-01 9:59 ` [patch 3/9] Remove the support for the VXTIME_HPET " jbohac
2007-02-01 9:59 ` [patch 4/9] Remove the TSC synchronization on SMP machines jbohac
2007-02-01 11:14 ` Andi Kleen
2007-02-01 13:17 ` Jiri Bohac
2007-02-01 15:16 ` Vojtech Pavlik
2007-02-02 7:14 ` Andi Kleen
2007-02-13 0:34 ` Christoph Lameter
2007-02-13 6:40 ` Arjan van de Ven
2007-02-13 8:28 ` Andi Kleen
2007-02-13 8:41 ` Arjan van de Ven
2007-02-13 17:09 ` Christoph Lameter
2007-02-13 17:20 ` Andi Kleen
2007-02-13 22:18 ` Vojtech Pavlik
2007-02-13 22:38 ` Andrea Arcangeli
2007-02-14 6:59 ` Vojtech Pavlik
2007-02-13 23:55 ` Christoph Lameter
2007-02-14 0:18 ` Paul Mackerras
2007-02-14 0:25 ` john stultz
2007-02-02 7:13 ` Andi Kleen
2007-02-01 21:05 ` mbligh
2007-02-03 1:16 ` H. Peter Anvin
2007-02-01 9:59 ` [patch 5/9] Add all the necessary structures to the vsyscall page jbohac
2007-02-01 11:17 ` Andi Kleen
2007-02-01 9:59 ` [patch 6/9] Add the "Master Timer" jbohac
2007-02-01 11:22 ` Andi Kleen
2007-02-01 13:29 ` Jiri Bohac
2007-02-01 9:59 ` [patch 7/9] Adapt the time initialization code jbohac
2007-02-01 11:26 ` Andi Kleen
2007-02-01 13:41 ` Jiri Bohac
2007-02-01 10:00 ` [patch 8/9] Add time_update_mt_guess() jbohac
2007-02-01 11:28 ` Andi Kleen
2007-02-01 13:54 ` Jiri Bohac
2007-02-01 10:00 ` jbohac [this message]
2007-02-01 11:36 ` [patch 9/9] Make use of the Master Timer Andi Kleen
2007-02-01 14:29 ` Jiri Bohac
2007-02-01 15:23 ` Vojtech Pavlik
2007-02-02 7:05 ` Andi Kleen
2007-02-02 7:04 ` Andi Kleen
2007-02-01 11:20 ` [patch 0/9] x86_64: reliable TSC-based gettimeofday Andi Kleen
2007-02-01 11:53 ` Andrea Arcangeli
2007-02-01 12:02 ` Andi Kleen
2007-02-01 12:54 ` Andrea Arcangeli
2007-02-01 12:17 ` Ingo Molnar
2007-02-01 14:52 ` Jiri Bohac
2007-02-01 16:56 ` john stultz
2007-02-01 19:41 ` Vojtech Pavlik
2007-02-01 11:34 ` Ingo Molnar
2007-02-01 11:46 ` [-mm patch] x86_64 GTOD: offer scalable vgettimeofday Ingo Molnar
2007-02-01 12:01 ` Andi Kleen
2007-02-01 12:14 ` Ingo Molnar
2007-02-01 12:17 ` [-mm patch] x86_64 GTOD: offer scalable vgettimeofday II Andi Kleen
2007-02-01 12:24 ` Ingo Molnar
2007-02-01 12:45 ` Andi Kleen
2007-02-02 4:22 ` [patch 0/9] x86_64: reliable TSC-based gettimeofday Andrew Morton
2007-02-02 7:07 ` Andi Kleen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070201103754.281474000@jet.suse.cz \
--to=jbohac@suse.cz \
--cc=ak@suse.de \
--cc=andrea@suse.de \
--cc=arjan@infradead.org \
--cc=johnstul@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=ssouhlal@freebsd.org \
--cc=tglx@linutronix.de \
--cc=vojtech@suse.cz \
--cc=zippel@linux-m68k.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).