linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: george anzinger <george@mvista.com>
To: john stultz <johnstul@us.ibm.com>
Cc: lkml <linux-kernel@vger.kernel.org>,
	Joel.Becker@oracle.com, "Martin J. Bligh" <mbligh@aracnet.com>,
	wim.coekaerts@oracle.com
Subject: Re: [RFC][PATCH] linux-2.5.64_monotonic-clock_A0
Date: Thu, 06 Mar 2003 17:32:54 -0800	[thread overview]
Message-ID: <3E67F6C6.6090708@mvista.com> (raw)
In-Reply-To: <1046996126.16608.509.camel@w-jstultz2.beaverton.ibm.com>

john stultz wrote:
> All,
> 	Recently I've been working with Joel Becker, author of the
> hangcheck-timer code (already accepted into 2.5) to resolve issues when
> running his code on systems without synced TSCs. 
> 
> The basic problem is that the hangcheck-timer code (Required for Oracle)
> needs a accurate hard clock which can be used to detect OS stalls (due
> to udelay() or pci bus hangs) that would cause system time to skew (its
> sort of a sanity check that insures the system's notion of time is
> accurate). However, currently they are using get_cycles() to fetch the
> cpu's TSC register, thus this does not work on systems w/o a synced TSC.
> As suggested by Andi Kleen (see thread here:
> http://www.uwsg.iu.edu/hypermail/linux/kernel/0302.0/1234.html ) I've
> worked with Joel and others to implement the monotonic_clock()
> interface.
> 
> This interface returns a unsigned long long representing the number of
> nanoseconds that has passed since time_init(). 
> 
> Since we're dealing with 64bit values the cost of the math required to
> do the cycles->ns conversion is a big concern. I'd be very happy if
> someone could suggest a faster way. 

IMHO I solved this problem in the high-res timers patch.  I have 
posted the core of the conversion stuff as a path (last night at 
23:24) with this subject "[PATCH] Functions to do easy scaled math." 
It consists of a header file containing a handful of inline asm code 
to do the messy stuff (i.e. the stuff C can't do due to standard 
restrictions).  I also provided an asm-generic version to fill the gap 
on other archs.  Oh, and there is even a text file to explain it all.

-g

> 
> Future plans to the interface include properly handling cpu_freq changes
> and porting to the different arches.
> 
> Comments and suggestions requested, flames expected :)
> 
> thanks
> -john
> 
> diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
> --- a/arch/i386/kernel/time.c	Thu Mar  6 16:12:27 2003
> +++ b/arch/i386/kernel/time.c	Thu Mar  6 16:12:27 2003
> @@ -138,6 +138,19 @@
>  	clock_was_set();
>  }
>  
> +unsigned long long monotonic_clock(void)
> +{
> +	unsigned long long ret;
> +	unsigned long seq;
> +	do {
> +		seq = read_seqbegin(&xtime_lock);
> +		ret = timer->monotonic_clock();
> +	} while (read_seqretry(&xtime_lock, seq));
> +	return ret;
> +}
> +EXPORT_SYMBOL(monotonic_clock);
> +
> +
>  /*
>   * In order to set the CMOS clock precisely, set_rtc_mmss has to be
>   * called 500 ms after the second nowtime has started, because when
> diff -Nru a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
> --- a/arch/i386/kernel/timers/timer_cyclone.c	Thu Mar  6 16:12:27 2003
> +++ b/arch/i386/kernel/timers/timer_cyclone.c	Thu Mar  6 16:12:27 2003
> @@ -27,19 +27,24 @@
>  #define CYCLONE_MPMC_OFFSET 0x51D0
>  #define CYCLONE_MPCS_OFFSET 0x51A8
>  #define CYCLONE_TIMER_FREQ 100000000
> -
> +#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /*40 bit mask*/
>  int use_cyclone = 0;
>  
>  static u32* volatile cyclone_timer;	/* Cyclone MPMC0 register */
> -static u32 last_cyclone_timer;
> +static u32 last_cyclone_low;
> +static u32 last_cyclone_high;
> +static unsigned long long monotonic_base;
>  
>  static void mark_offset_cyclone(void)
>  {
>  	int count;
> +	unsigned long long this_offset, last_offset;
> +	last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
> +	
>  	spin_lock(&i8253_lock);
>  	/* quickly read the cyclone timer */
> -	if(cyclone_timer)
> -		last_cyclone_timer = cyclone_timer[0];
> +	last_cyclone_high = cyclone_timer[1];
> +	last_cyclone_low = cyclone_timer[0];
>  
>  	/* calculate delay_at_last_interrupt */
>  	outb_p(0x00, 0x43);     /* latch the count ASAP */
> @@ -50,6 +55,10 @@
>  
>  	count = ((LATCH-1) - count) * TICK_SIZE;
>  	delay_at_last_interrupt = (count + LATCH/2) / LATCH;
> +
> +	/* update the monotonic base value */
> +	this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
> +	monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
>  }
>  
>  static unsigned long get_offset_cyclone(void)
> @@ -63,7 +72,7 @@
>  	offset = cyclone_timer[0];
>  
>  	/* .. relative to previous jiffy */
> -	offset = offset - last_cyclone_timer;
> +	offset = offset - last_cyclone_low;
>  
>  	/* convert cyclone ticks to microseconds */	
>  	/* XXX slow, can we speed this up? */
> @@ -73,6 +82,21 @@
>  	return delay_at_last_interrupt + offset;
>  }
>  
> +static unsigned long long monotonic_clock_cyclone(void)
> +{
> +	
> +	u32 now_low = cyclone_timer[0];
> +	u32 now_high = cyclone_timer[1];
> +	unsigned long long last_offset, this_offset;
> +	unsigned long long ret;
> +	last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
> +	this_offset = ((unsigned long long)now_high<<32)|now_low;
> +	
> +	ret = monotonic_base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
> +	ret = ret * (1000000000 / CYCLONE_TIMER_FREQ);
> +	return ret;
> +}
> +
>  static int init_cyclone(void)
>  {
>  	u32* reg;	
> @@ -190,5 +214,6 @@
>  	.init = init_cyclone, 
>  	.mark_offset = mark_offset_cyclone, 
>  	.get_offset = get_offset_cyclone,
> +	.monotonic_clock =	monotonic_clock_cyclone,
>  	.delay = delay_cyclone,
>  };
> diff -Nru a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
> --- a/arch/i386/kernel/timers/timer_none.c	Thu Mar  6 16:12:27 2003
> +++ b/arch/i386/kernel/timers/timer_none.c	Thu Mar  6 16:12:27 2003
> @@ -15,6 +15,11 @@
>  	return 0;
>  }
>  
> +static unsigned long long monotonic_clock_none(void)
> +{
> +	return 0;
> +}
> +
>  static void delay_none(unsigned long loops)
>  {
>  	int d0;
> @@ -33,5 +38,6 @@
>  	.init =		init_none, 
>  	.mark_offset =	mark_offset_none, 
>  	.get_offset =	get_offset_none,
> +	.monotonic_clock =	monotonic_clock_none,
>  	.delay = delay_none,
>  };
> diff -Nru a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
> --- a/arch/i386/kernel/timers/timer_pit.c	Thu Mar  6 16:12:27 2003
> +++ b/arch/i386/kernel/timers/timer_pit.c	Thu Mar  6 16:12:27 2003
> @@ -27,6 +27,11 @@
>  	/* nothing needed */
>  }
>  
> +static unsigned long long monotonic_clock_pit(void)
> +{
> +	return 0;
> +}
> +
>  static void delay_pit(unsigned long loops)
>  {
>  	int d0;
> @@ -141,5 +146,6 @@
>  	.init =		init_pit, 
>  	.mark_offset =	mark_offset_pit, 
>  	.get_offset =	get_offset_pit,
> +	.monotonic_clock = monotonic_clock_pit,
>  	.delay = delay_pit,
>  };
> diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
> --- a/arch/i386/kernel/timers/timer_tsc.c	Thu Mar  6 16:12:27 2003
> +++ b/arch/i386/kernel/timers/timer_tsc.c	Thu Mar  6 16:12:27 2003
> @@ -23,6 +23,45 @@
>  static int delay_at_last_interrupt;
>  
>  static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
> +static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
> +static unsigned long long monotonic_base;
> +
> +
> +/*
> + * accurate 64-bit/32-bit division, stolen from smpboot.c
> + */
> +unsigned long long div64 (unsigned long long a, unsigned long b0)
> +{
> +	unsigned int a1, a2;
> +	unsigned long long res;
> +
> +	a1 = ((unsigned int*)&a)[0];
> +	a2 = ((unsigned int*)&a)[1];
> +
> +	res = a1/b0 +
> +		(unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
> +		a2 / b0 +
> +		(a2 * (0xffffffff % b0)) / b0;
> +
> +	return res;
> +}
> +
> +static inline unsigned long long cycles_2_ns(unsigned long long cyc)
> +{
> +	unsigned long long ret;
> +	unsigned long cpu_mhz = cpu_khz/1000;
> +
> +	/* convert from cycles(64bits) => nanoseconds (64bits)
> +	 *  basic equation:
> +	 *    cycles / ((cycles / sec) * (1sec / 10^9ns)) = ns
> +	 *    cycles / ((cpu_mhz * 1000000) / 10^9)) = ns
> +	 *    cycles / (cpu_mhz / 10^3) = ns
> +	 *    cycles * 10^3 / cpu_mhz = ns
> +	 */
> +	ret = cyc * 1000;
> +	ret = div64(ret,cpu_mhz);
> +	return ret;
> +}
>  
>  /* Cached *multiplier* to convert TSC counts to microseconds.
>   * (see the equation below).
> @@ -60,11 +99,25 @@
>  	return delay_at_last_interrupt + edx;
>  }
>  
> +static unsigned long long monotonic_clock_tsc(void)
> +{
> +	unsigned long long last_offset, this_offset;
> +	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
> +
> +	/* Read the Time Stamp Counter */
> +	rdtscll(this_offset);
> +
> +	/* return the value in ns */
> +	return  monotonic_base + cycles_2_ns(this_offset - last_offset);
> +}
> +
>  static void mark_offset_tsc(void)
>  {
>  	int count;
>  	int countmp;
>  	static int count1=0, count2=LATCH;
> +	unsigned long long this_offset, last_offset;
> +	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
>  	/*
>  	 * It is important that these two operations happen almost at
>  	 * the same time. We do the RDTSC stuff first, since it's
> @@ -79,7 +132,7 @@
>  	
>  	/* read Pentium cycle counter */
>  
> -	rdtscl(last_tsc_low);
> +	rdtsc(last_tsc_low, last_tsc_high);
>  
>  	spin_lock(&i8253_lock);
>  	outb_p(0x00, 0x43);     /* latch the count ASAP */
> @@ -104,6 +157,11 @@
>  
>  	count = ((LATCH-1) - count) * TICK_SIZE;
>  	delay_at_last_interrupt = (count + LATCH/2) / LATCH;
> +	
> +	/* update the monotonic base value */
> +	this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
> +	monotonic_base += cycles_2_ns(this_offset - last_offset);
> +
>  }
>  
>  static void delay_tsc(unsigned long loops)
> @@ -326,5 +384,6 @@
>  	.init =		init_tsc,
>  	.mark_offset =	mark_offset_tsc, 
>  	.get_offset =	get_offset_tsc,
> +	.monotonic_clock =	monotonic_clock_tsc,
>  	.delay = delay_tsc,
>  };
> diff -Nru a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c
> --- a/drivers/char/hangcheck-timer.c	Thu Mar  6 16:12:27 2003
> +++ b/drivers/char/hangcheck-timer.c	Thu Mar  6 16:12:27 2003
> @@ -78,11 +78,13 @@
>  static struct timer_list hangcheck_ticktock =
>  		TIMER_INITIALIZER(hangcheck_fire, 0, 0);
>  
> +extern unsigned long long monotonic_clock(void);
> +
>  static void hangcheck_fire(unsigned long data)
>  {
>  	unsigned long long cur_tsc, tsc_diff;
>  
> -	cur_tsc = get_cycles();
> +	cur_tsc = monotonic_clock();
>  
>  	if (cur_tsc > hangcheck_tsc)
>  		tsc_diff = cur_tsc - hangcheck_tsc;
> @@ -98,7 +100,7 @@
>  		}
>  	}
>  	mod_timer(&hangcheck_ticktock, jiffies + (hangcheck_tick*HZ));
> -	hangcheck_tsc = get_cycles();
> +	hangcheck_tsc = monotonic_clock();
>  }
>  
>  
> @@ -108,10 +110,10 @@
>  	       VERSION_STR, hangcheck_tick, hangcheck_margin);
>  
>  	hangcheck_tsc_margin = hangcheck_margin + hangcheck_tick;
> -	hangcheck_tsc_margin *= HZ;
> -	hangcheck_tsc_margin *= current_cpu_data.loops_per_jiffy;
> +	hangcheck_tsc_margin *= 1000000000;
> +
>  
> -	hangcheck_tsc = get_cycles();
> +	hangcheck_tsc = monotonic_clock();
>  	mod_timer(&hangcheck_ticktock, jiffies + (hangcheck_tick*HZ));
>  
>  	return 0;
> diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h
> --- a/include/asm-i386/timer.h	Thu Mar  6 16:12:27 2003
> +++ b/include/asm-i386/timer.h	Thu Mar  6 16:12:27 2003
> @@ -14,6 +14,7 @@
>  	int (*init)(void);
>  	void (*mark_offset)(void);
>  	unsigned long (*get_offset)(void);
> +	unsigned long long (*monotonic_clock)(void);
>  	void (*delay)(unsigned long);
>  };
>  
> 
> 

-- 
George Anzinger   george@mvista.com
High-res-timers:  http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml


  reply	other threads:[~2003-03-07  1:23 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-03-07  0:15 [RFC][PATCH] linux-2.5.64_monotonic-clock_A0 john stultz
2003-03-07  1:32 ` george anzinger [this message]
2003-03-07  1:48   ` john stultz
2003-03-07  8:26     ` george anzinger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3E67F6C6.6090708@mvista.com \
    --to=george@mvista.com \
    --cc=Joel.Becker@oracle.com \
    --cc=johnstul@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mbligh@aracnet.com \
    --cc=wim.coekaerts@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).