linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: jbohac@suse.cz
To: Andi Kleen <ak@suse.de>
Cc: linux-kernel@vger.kernel.org, Jiri Bohac <jbohac@suse.cz>,
	Vojtech Pavlik <vojtech@suse.cz>,
	ssouhlal@freebsd.org, arjan@infradead.org, tglx@linutronix.de,
	johnstul@us.ibm.com, zippel@linux-m68k.org, andrea@suse.de
Subject: [patch 4/9] Remove the TSC synchronization on SMP machines
Date: Thu, 01 Feb 2007 10:59:56 +0100	[thread overview]
Message-ID: <20070201103753.753349000@jet.suse.cz> (raw)
In-Reply-To: 20070201095952.589234000@jet.suse.cz

[-- Attachment #1: remove_tsc_synchronization --]
[-- Type: text/plain, Size: 7305 bytes --]

TSC is either synchronized by design or not reliable
to be used for anything, let alone timekeeping.

Signed-off-by: Jiri Bohac <jbohac@suse.cz>

Index: linux-2.6.20-rc5/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux-2.6.20-rc5.orig/arch/x86_64/kernel/smpboot.c
+++ linux-2.6.20-rc5/arch/x86_64/kernel/smpboot.c
@@ -148,217 +148,6 @@ static void __cpuinit smp_store_cpu_info
 	print_cpu_info(c);
 }
 
-/*
- * New Funky TSC sync algorithm borrowed from IA64.
- * Main advantage is that it doesn't reset the TSCs fully and
- * in general looks more robust and it works better than my earlier
- * attempts. I believe it was written by David Mosberger. Some minor
- * adjustments for x86-64 by me -AK
- *
- * Original comment reproduced below.
- *
- * Synchronize TSC of the current (slave) CPU with the TSC of the
- * MASTER CPU (normally the time-keeper CPU).  We use a closed loop to
- * eliminate the possibility of unaccounted-for errors (such as
- * getting a machine check in the middle of a calibration step).  The
- * basic idea is for the slave to ask the master what itc value it has
- * and to read its own itc before and after the master responds.  Each
- * iteration gives us three timestamps:
- *
- *	slave		master
- *
- *	t0 ---\
- *             ---\
- *		   --->
- *			tm
- *		   /---
- *	       /---
- *	t1 <---
- *
- *
- * The goal is to adjust the slave's TSC such that tm falls exactly
- * half-way between t0 and t1.  If we achieve this, the clocks are
- * synchronized provided the interconnect between the slave and the
- * master is symmetric.  Even if the interconnect were asymmetric, we
- * would still know that the synchronization error is smaller than the
- * roundtrip latency (t0 - t1).
- *
- * When the interconnect is quiet and symmetric, this lets us
- * synchronize the TSC to within one or two cycles.  However, we can
- * only *guarantee* that the synchronization is accurate to within a
- * round-trip time, which is typically in the range of several hundred
- * cycles (e.g., ~500 cycles).  In practice, this means that the TSCs
- * are usually almost perfectly synchronized, but we shouldn't assume
- * that the accuracy is much better than half a micro second or so.
- *
- * [there are other errors like the latency of RDTSC and of the
- * WRMSR. These can also account to hundreds of cycles. So it's
- * probably worse. It claims 153 cycles error on a dual Opteron,
- * but I suspect the numbers are actually somewhat worse -AK]
- */
-
-#define MASTER	0
-#define SLAVE	(SMP_CACHE_BYTES/8)
-
-/* Intentionally don't use cpu_relax() while TSC synchronization
-   because we don't want to go into funky power save modi or cause
-   hypervisors to schedule us away.  Going to sleep would likely affect
-   latency and low latency is the primary objective here. -AK */
-#define no_cpu_relax() barrier()
-
-static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
-static volatile __cpuinitdata unsigned long go[SLAVE + 1];
-static int notscsync __cpuinitdata;
-
-#undef DEBUG_TSC_SYNC
-
-#define NUM_ROUNDS	64	/* magic value */
-#define NUM_ITERS	5	/* likewise */
-
-/* Callback on boot CPU */
-static __cpuinit void sync_master(void *arg)
-{
-	unsigned long flags, i;
-
-	go[MASTER] = 0;
-
-	local_irq_save(flags);
-	{
-		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
-			while (!go[MASTER])
-				no_cpu_relax();
-			go[MASTER] = 0;
-			rdtscll(go[SLAVE]);
-		}
-	}
-	local_irq_restore(flags);
-}
-
-/*
- * Return the number of cycles by which our tsc differs from the tsc
- * on the master (time-keeper) CPU.  A positive number indicates our
- * tsc is ahead of the master, negative that it is behind.
- */
-static inline long
-get_delta(long *rt, long *master)
-{
-	unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
-	unsigned long tcenter, t0, t1, tm;
-	int i;
-
-	for (i = 0; i < NUM_ITERS; ++i) {
-		rdtscll(t0);
-		go[MASTER] = 1;
-		while (!(tm = go[SLAVE]))
-			no_cpu_relax();
-		go[SLAVE] = 0;
-		rdtscll(t1);
-
-		if (t1 - t0 < best_t1 - best_t0)
-			best_t0 = t0, best_t1 = t1, best_tm = tm;
-	}
-
-	*rt = best_t1 - best_t0;
-	*master = best_tm - best_t0;
-
-	/* average best_t0 and best_t1 without overflow: */
-	tcenter = (best_t0/2 + best_t1/2);
-	if (best_t0 % 2 + best_t1 % 2 == 2)
-		++tcenter;
-	return tcenter - best_tm;
-}
-
-static __cpuinit void sync_tsc(unsigned int master)
-{
-	int i, done = 0;
-	long delta, adj, adjust_latency = 0;
-	unsigned long flags, rt, master_time_stamp, bound;
-#ifdef DEBUG_TSC_SYNC
-	static struct syncdebug {
-		long rt;	/* roundtrip time */
-		long master;	/* master's timestamp */
-		long diff;	/* difference between midpoint and master's timestamp */
-		long lat;	/* estimate of tsc adjustment latency */
-	} t[NUM_ROUNDS] __cpuinitdata;
-#endif
-
-	printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n",
-		smp_processor_id(), master);
-
-	go[MASTER] = 1;
-
-	/* It is dangerous to broadcast IPI as cpus are coming up,
-	 * as they may not be ready to accept them.  So since
-	 * we only need to send the ipi to the boot cpu direct
-	 * the message, and avoid the race.
-	 */
-	smp_call_function_single(master, sync_master, NULL, 1, 0);
-
-	while (go[MASTER])	/* wait for master to be ready */
-		no_cpu_relax();
-
-	spin_lock_irqsave(&tsc_sync_lock, flags);
-	{
-		for (i = 0; i < NUM_ROUNDS; ++i) {
-			delta = get_delta(&rt, &master_time_stamp);
-			if (delta == 0) {
-				done = 1;	/* let's lock on to this... */
-				bound = rt;
-			}
-
-			if (!done) {
-				unsigned long t;
-				if (i > 0) {
-					adjust_latency += -delta;
-					adj = -delta + adjust_latency/4;
-				} else
-					adj = -delta;
-
-				rdtscll(t);
-				wrmsrl(MSR_IA32_TSC, t + adj);
-			}
-#ifdef DEBUG_TSC_SYNC
-			t[i].rt = rt;
-			t[i].master = master_time_stamp;
-			t[i].diff = delta;
-			t[i].lat = adjust_latency/4;
-#endif
-		}
-	}
-	spin_unlock_irqrestore(&tsc_sync_lock, flags);
-
-#ifdef DEBUG_TSC_SYNC
-	for (i = 0; i < NUM_ROUNDS; ++i)
-		printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
-		       t[i].rt, t[i].master, t[i].diff, t[i].lat);
-#endif
-
-	printk(KERN_INFO
-	       "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
-	       "maxerr %lu cycles)\n",
-	       smp_processor_id(), master, delta, rt);
-}
-
-static void __cpuinit tsc_sync_wait(void)
-{
-	/*
-	 * When the CPU has synchronized TSCs assume the BIOS
-  	 * or the hardware already synced.  Otherwise we could
-	 * mess up a possible perfect synchronization with a
-	 * not-quite-perfect algorithm.
-	 */
-	if (notscsync || !cpu_has_tsc || !unsynchronized_tsc())
-		return;
-	sync_tsc(0);
-}
-
-static __init int notscsync_setup(char *s)
-{
-	notscsync = 1;
-	return 1;
-}
-__setup("notscsync", notscsync_setup);
-
 static atomic_t init_deasserted __cpuinitdata;
 
 /*
@@ -565,14 +354,6 @@ void __cpuinit start_secondary(void)
 	 */
 	set_cpu_sibling_map(smp_processor_id());
 
-	/* 
-  	 * Wait for TSC sync to not schedule things before.
-	 * We still process interrupts, which could see an inconsistent
-	 * time in that window unfortunately. 
-	 * Do this here because TSC sync has global unprotected state.
- 	 */
-	tsc_sync_wait();
-
 	/*
 	 * We need to hold call_lock, so there is no inconsistency
 	 * between the time smp_call_function() determines number of

--

  parent reply	other threads:[~2007-02-01 11:13 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-02-01  9:59 [patch 0/9] x86_64: reliable TSC-based gettimeofday jbohac
2007-02-01  9:59 ` [patch 1/9] Fix HPET init race jbohac
2007-02-02  2:34   ` Andrew Morton
2007-02-06 16:44     ` Jiri Bohac
2007-02-07  0:12       ` Andrew Morton
2007-02-10 12:31         ` Andi Kleen
2007-07-26 20:58           ` Robin Holt
2007-02-01  9:59 ` [patch 2/9] Remove the support for the VXTIME_PMTMR timer mode jbohac
2007-02-01 11:13   ` Andi Kleen
2007-02-01 13:13     ` Jiri Bohac
2007-02-01 13:13       ` Andi Kleen
2007-02-01 13:59         ` Jiri Bohac
2007-02-01 14:18           ` Andi Kleen
2007-02-01  9:59 ` [patch 3/9] Remove the support for the VXTIME_HPET " jbohac
2007-02-01  9:59 ` jbohac [this message]
2007-02-01 11:14   ` [patch 4/9] Remove the TSC synchronization on SMP machines Andi Kleen
2007-02-01 13:17     ` Jiri Bohac
2007-02-01 15:16       ` Vojtech Pavlik
2007-02-02  7:14         ` Andi Kleen
2007-02-13  0:34           ` Christoph Lameter
2007-02-13  6:40             ` Arjan van de Ven
2007-02-13  8:28               ` Andi Kleen
2007-02-13  8:41                 ` Arjan van de Ven
2007-02-13 17:09               ` Christoph Lameter
2007-02-13 17:20                 ` Andi Kleen
2007-02-13 22:18                   ` Vojtech Pavlik
2007-02-13 22:38                     ` Andrea Arcangeli
2007-02-14  6:59                       ` Vojtech Pavlik
2007-02-13 23:55                     ` Christoph Lameter
2007-02-14  0:18                   ` Paul Mackerras
2007-02-14  0:25                     ` john stultz
2007-02-02  7:13       ` Andi Kleen
2007-02-01 21:05     ` mbligh
2007-02-03  1:16   ` H. Peter Anvin
2007-02-01  9:59 ` [patch 5/9] Add all the necessary structures to the vsyscall page jbohac
2007-02-01 11:17   ` Andi Kleen
2007-02-01  9:59 ` [patch 6/9] Add the "Master Timer" jbohac
2007-02-01 11:22   ` Andi Kleen
2007-02-01 13:29     ` Jiri Bohac
2007-02-01  9:59 ` [patch 7/9] Adapt the time initialization code jbohac
2007-02-01 11:26   ` Andi Kleen
2007-02-01 13:41     ` Jiri Bohac
2007-02-01 10:00 ` [patch 8/9] Add time_update_mt_guess() jbohac
2007-02-01 11:28   ` Andi Kleen
2007-02-01 13:54     ` Jiri Bohac
2007-02-01 10:00 ` [patch 9/9] Make use of the Master Timer jbohac
2007-02-01 11:36   ` Andi Kleen
2007-02-01 14:29     ` Jiri Bohac
2007-02-01 15:23       ` Vojtech Pavlik
2007-02-02  7:05         ` Andi Kleen
2007-02-02  7:04       ` Andi Kleen
2007-02-01 11:20 ` [patch 0/9] x86_64: reliable TSC-based gettimeofday Andi Kleen
2007-02-01 11:53   ` Andrea Arcangeli
2007-02-01 12:02     ` Andi Kleen
2007-02-01 12:54       ` Andrea Arcangeli
2007-02-01 12:17   ` Ingo Molnar
2007-02-01 14:52   ` Jiri Bohac
2007-02-01 16:56     ` john stultz
2007-02-01 19:41       ` Vojtech Pavlik
2007-02-01 11:34 ` Ingo Molnar
2007-02-01 11:46 ` [-mm patch] x86_64 GTOD: offer scalable vgettimeofday Ingo Molnar
2007-02-01 12:01   ` Andi Kleen
2007-02-01 12:14     ` Ingo Molnar
2007-02-01 12:17   ` [-mm patch] x86_64 GTOD: offer scalable vgettimeofday II Andi Kleen
2007-02-01 12:24     ` Ingo Molnar
2007-02-01 12:45       ` Andi Kleen
2007-02-02  4:22 ` [patch 0/9] x86_64: reliable TSC-based gettimeofday Andrew Morton
2007-02-02  7:07   ` Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070201103753.753349000@jet.suse.cz \
    --to=jbohac@suse.cz \
    --cc=ak@suse.de \
    --cc=andrea@suse.de \
    --cc=arjan@infradead.org \
    --cc=johnstul@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ssouhlal@freebsd.org \
    --cc=tglx@linutronix.de \
    --cc=vojtech@suse.cz \
    --cc=zippel@linux-m68k.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).