linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: jbohac@suse.cz
To: Andi Kleen <ak@suse.de>
Cc: linux-kernel@vger.kernel.org, Jiri Bohac <jbohac@suse.cz>,
	Vojtech Pavlik <vojtech@suse.cz>,
	ssouhlal@freebsd.org, arjan@infradead.org, tglx@linutronix.de,
	johnstul@us.ibm.com, zippel@linux-m68k.org, andrea@suse.de
Subject: [patch 8/9] Add time_update_mt_guess()
Date: Thu, 01 Feb 2007 11:00:00 +0100	[thread overview]
Message-ID: <20070201103754.136630000@jet.suse.cz> (raw)
In-Reply-To: 20070201095952.589234000@jet.suse.cz

[-- Attachment #1: update_mt_guess --]
[-- Type: text/plain, Size: 9019 bytes --]

time_update_mt_guess() is the core of the TSC->MT approximation magic.

Called periodically from the LAPIC timer interrupt handler, it fine-tunes 
all the per-CPU offsets and ratios needed by guess_mt() to approximate the
MT using any processor's TSC.

We also need to update these from the cpufreq notifiers. Because a frequency
change makes the approximation unreliable (we don't know _exactly_ when it
happens) the approximation is disabled for a while after a frequency change and 
it's not re-enabled until the approximation stabilises again.

Signed-off-by: Jiri Bohac <jbohac@suse.cz>


Index: linux-2.6.20-rc5/arch/x86_64/kernel/apic.c
===================================================================
--- linux-2.6.20-rc5.orig/arch/x86_64/kernel/apic.c
+++ linux-2.6.20-rc5/arch/x86_64/kernel/apic.c
@@ -63,6 +63,9 @@ int using_apic_timer __read_mostly = 0;
 
 static void apic_pm_activate(void);
 
+extern void time_update_mt_guess(void);
+
+
 void enable_NMI_through_LVT0 (void * dummy)
 {
 	unsigned int v;
@@ -986,6 +989,8 @@ void smp_local_timer_interrupt(void)
 	 * Currently this isn't too much of an issue (performance wise),
 	 * we can take more than 100K local irqs per second on a 100 MHz P5.
 	 */
+
+	 time_update_mt_guess();
 }
 
 /*
Index: linux-2.6.20-rc5/arch/x86_64/kernel/time.c
===================================================================
--- linux-2.6.20-rc5.orig/arch/x86_64/kernel/time.c
+++ linux-2.6.20-rc5/arch/x86_64/kernel/time.c
@@ -221,6 +221,126 @@ static u32 read_master_timer_pm(void)
 }
 
 /*
+ * This function, called from the LAPIC interrupt,
+ * periodically updates all the per-CPU values needed by
+ * guess_mt()
+ */
+void time_update_mt_guess(void)
+{
+	u64 t, delta_t, delta_mt, mt;
+	s64 guess_mt_err, guess_mt_err_nsec, tsc_per_tick, tsc_slope_corr,
+	    current_slope, old_mt_err;
+	int cpu = smp_processor_id(), resync;
+	unsigned long flags;
+
+	if (vxtime.mode == VXTIME_TSC && cpu != 0)
+		return;
+
+	local_irq_save(flags);
+
+	/* if a frequency change is in progress, don't recalculate anything
+	   as this would destroy the fine-tuned slope. We don't rely on the TSC
+	   during this time, so we don't care about the accuracy at all */
+	if (vxtime.cpu[cpu].tsc_invalid == VXTIME_TSC_CPUFREQ) {
+		local_irq_restore(flags);
+		return;
+	}
+
+	mt = get_master_timer64();
+	t = get_cycles_sync();
+
+	write_seqlock(&xtime_lock);
+
+	/* get the error of the estimated MT value */
+	delta_t = t - vxtime.cpu[cpu].tsc_last;
+	delta_mt = mt - vxtime.cpu[cpu].mt_last;
+	tsc_per_tick = ((mt_per_tick << 32) / delta_mt * delta_t) >> 32;
+
+	vxtime.cpu[cpu].mt_base = __guess_mt(t, cpu);
+
+	guess_mt_err = mt - vxtime.cpu[cpu].mt_base;
+	guess_mt_err_nsec = (guess_mt_err * (s64)vxtime.mt_q) >> 32;
+	old_mt_err =  ((s64)(vxtime.cpu[cpu].tsc_slope_avg - vxtime.cpu[cpu].tsc_slope)
+			* tsc_per_tick) >> TSC_SLOPE_SCALE;
+	current_slope = (delta_mt << TSC_SLOPE_SCALE) / delta_t;
+
+	/* calculate a long time average to attenuate oscilation */
+	vxtime.cpu[cpu].tsc_slope_avg = ((TSC_SLOPE_DECAY - 1) * vxtime.cpu[cpu].tsc_slope_avg +
+			current_slope) / TSC_SLOPE_DECAY;
+
+	tsc_slope_corr = ((s64)(guess_mt_err << TSC_SLOPE_SCALE)) / tsc_per_tick;
+	vxtime.cpu[cpu].tsc_slope = vxtime.cpu[cpu].tsc_slope_avg + tsc_slope_corr;
+
+	if ((s64)vxtime.cpu[cpu].tsc_slope < 0) {
+		vxtime.cpu[cpu].tsc_slope = 0;
+		vxtime.cpu[cpu].tsc_slope_avg = current_slope;
+	}
+
+	if (abs(guess_mt_err) > (mt_per_tick >> 2))
+		printk(KERN_DEBUG "Master Timer guess on cpu %d off by %lld.%.6ld seconds\n",
+			cpu, guess_mt_err_nsec / NSEC_PER_SEC,
+			(abs(guess_mt_err_nsec) % NSEC_PER_SEC) / 1000);
+
+	resync = 0;
+	/* if the guess is off by more than a second, something has gone very
+	   wrong; we'll break monotonicity and re-sync the guess with the MT */
+	if (abs(guess_mt_err_nsec) > NSEC_PER_SEC) {
+		resync = 1;
+		if (vxtime.mode != VXTIME_MT && guess_mt_err < 0)
+			printk(KERN_ERR "time not monotonic on cpu %d\n", cpu);
+	}
+	/* else if the guess is off by more than a jiffie, only synchronize the
+	   guess with the MT if the guess is behind (won't break monotonicity);
+	   if the guess is ahead, stop the timer by setting slope to zero */
+	else if (abs(guess_mt_err) > mt_per_tick) {
+		if (guess_mt_err > 0)
+			resync = 1;
+		else {
+			vxtime.cpu[cpu].tsc_slope = 0;
+			vxtime.cpu[cpu].tsc_slope_avg = current_slope;
+		}
+	}
+	/* good enough to switch back from temporary MT mode? */
+	else if (vxtime.cpu[cpu].tsc_invalid &&
+		    abs(guess_mt_err) < mt_per_tick / USEC_PER_TICK &&
+		    abs(old_mt_err) < mt_per_tick / USEC_PER_TICK &&
+		    mt > vxtime.cpu[cpu].last_mt_guess) {
+			vxtime.cpu[cpu].tsc_invalid = 0;
+			vxtime.cpu[cpu].mt_base = mt;
+			vxtime.cpu[cpu].tsc_slope = vxtime.cpu[cpu].tsc_slope_avg;
+	}
+
+	/* hard re-sync of the guess to the current value of the MT */
+	if (resync) {
+		vxtime.cpu[cpu].mt_base = mt;
+		vxtime.cpu[cpu].tsc_slope = vxtime.cpu[cpu].tsc_slope_avg = current_slope;
+
+		printk(KERN_INFO "Master Timer re-syncing on cpu %d (mt=%lld, slope=%lld)\n",
+			cpu, mt, vxtime.cpu[cpu].tsc_slope);
+	}
+
+	if (vxtime.cpu[cpu].tsc_slope == 0)
+		printk(KERN_INFO "timer on cpu %d frozen, waiting for time to catch up\n", cpu);
+
+	vxtime.cpu[cpu].tsc_last = t;
+	vxtime.cpu[cpu].mt_last = mt;
+
+	write_sequnlock(&xtime_lock);
+	local_irq_restore(flags);
+}
+
+inline u64 mt_to_nsec(u64 mt)
+{
+	u64 ret;
+	ret  = ((mt & 0xffffff) * vxtime.mt_q) >> 32;
+	mt >>= 24;
+	ret += ((mt & 0xffffff) * vxtime.mt_q) >> 8;
+	mt >>= 24;
+	ret += ( mt             * vxtime.mt_q) << 16;
+	return ret;
+}
+
+/*
  * do_gettimeoffset() returns microseconds since last timer interrupt was
  * triggered by hardware. A memory read of HPET is slower than a register read
  * of TSC, but much more reliable. It's also synchronized to the timer
@@ -666,50 +786,83 @@ static void cpufreq_delayed_get(void)
 }
 
 static unsigned int  ref_freq = 0;
-static unsigned long loops_per_jiffy_ref = 0;
 
 static unsigned long cpu_khz_ref = 0;
 
+struct cpufreq_notifier_data {
+	struct cpufreq_freqs *freq;
+	unsigned long val;
+};
+
+/* called on the CPU that changed frequency */
+static void time_cpufreq_notifier_on_cpu(void *data)
+{
+	unsigned long flags;
+	int cpu;
+	struct cpufreq_notifier_data *cnd = data;
+
+	write_seqlock_irqsave(&xtime_lock, flags);
+
+	cpu = smp_processor_id();
+	switch (cnd->val) {
+
+		case CPUFREQ_PRECHANGE:
+		case CPUFREQ_SUSPENDCHANGE:
+			if (!vxtime.cpu[cpu].tsc_invalid)
+				vxtime.cpu[cpu].last_mt_guess = __guess_mt(get_cycles_sync(), cpu);
+			vxtime.cpu[cpu].tsc_invalid = VXTIME_TSC_CPUFREQ;
+			break;
+
+		case CPUFREQ_POSTCHANGE:
+		case CPUFREQ_RESUMECHANGE:
+			vxtime.cpu[cpu].tsc_slope = ((vxtime.cpu[cpu].tsc_slope >> 4) * cnd->freq->old / cnd->freq->new) << 4;
+			vxtime.cpu[cpu].tsc_slope_avg = ((vxtime.cpu[cpu].tsc_slope_avg >> 4) * cnd->freq->old / cnd->freq->new) << 4;
+
+			vxtime.cpu[cpu].mt_base = vxtime.cpu[cpu].mt_last = get_master_timer64();
+			vxtime.cpu[cpu].tsc_last = get_cycles_sync();
+
+			vxtime.cpu[cpu].tsc_invalid = VXTIME_TSC_INVALID;
+			break;
+	}
+
+	write_sequnlock_irqrestore(&xtime_lock, flags);
+}
+
 static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 				 void *data)
 {
-        struct cpufreq_freqs *freq = data;
-	unsigned long *lpj, dummy;
+	struct cpufreq_notifier_data cnd = {
+		.freq = data,
+		.val = val,
+	};
 
-	if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
+	if (cpu_has(&cpu_data[cnd.freq->cpu], X86_FEATURE_CONSTANT_TSC))
 		return 0;
 
-	lpj = &dummy;
-	if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-#ifdef CONFIG_SMP
-		lpj = &cpu_data[freq->cpu].loops_per_jiffy;
-#else
-		lpj = &boot_cpu_data.loops_per_jiffy;
-#endif
-
 	if (!ref_freq) {
-		ref_freq = freq->old;
-		loops_per_jiffy_ref = *lpj;
+		ref_freq = cnd.freq->old;
 		cpu_khz_ref = cpu_khz;
 	}
-        if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
-            (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+
+	if ((val == CPUFREQ_PRECHANGE  && cnd.freq->old < cnd.freq->new) ||
+	    (val == CPUFREQ_POSTCHANGE && cnd.freq->old > cnd.freq->new) ||
 	    (val == CPUFREQ_RESUMECHANGE)) {
-                *lpj =
-		cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
 
-		cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
-		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-			vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
+		cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, cnd.freq->new);
+
 	}
-	
-	set_cyc2ns_scale(cpu_khz_ref);
+
+	preempt_disable();
+	if (smp_processor_id() == cnd.freq->cpu)
+		time_cpufreq_notifier_on_cpu(&cnd);
+	else smp_call_function_single(cnd.freq->cpu, time_cpufreq_notifier_on_cpu, &cnd, 0, 1);
+	preempt_enable();
 
 	return 0;
 }
- 
+
 static struct notifier_block time_cpufreq_notifier_block = {
-         .notifier_call  = time_cpufreq_notifier
+	 .notifier_call  = time_cpufreq_notifier
 };
 
 static int __init cpufreq_tsc(void)

--

  parent reply	other threads:[~2007-02-01 11:14 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-02-01  9:59 [patch 0/9] x86_64: reliable TSC-based gettimeofday jbohac
2007-02-01  9:59 ` [patch 1/9] Fix HPET init race jbohac
2007-02-02  2:34   ` Andrew Morton
2007-02-06 16:44     ` Jiri Bohac
2007-02-07  0:12       ` Andrew Morton
2007-02-10 12:31         ` Andi Kleen
2007-07-26 20:58           ` Robin Holt
2007-02-01  9:59 ` [patch 2/9] Remove the support for the VXTIME_PMTMR timer mode jbohac
2007-02-01 11:13   ` Andi Kleen
2007-02-01 13:13     ` Jiri Bohac
2007-02-01 13:13       ` Andi Kleen
2007-02-01 13:59         ` Jiri Bohac
2007-02-01 14:18           ` Andi Kleen
2007-02-01  9:59 ` [patch 3/9] Remove the support for the VXTIME_HPET " jbohac
2007-02-01  9:59 ` [patch 4/9] Remove the TSC synchronization on SMP machines jbohac
2007-02-01 11:14   ` Andi Kleen
2007-02-01 13:17     ` Jiri Bohac
2007-02-01 15:16       ` Vojtech Pavlik
2007-02-02  7:14         ` Andi Kleen
2007-02-13  0:34           ` Christoph Lameter
2007-02-13  6:40             ` Arjan van de Ven
2007-02-13  8:28               ` Andi Kleen
2007-02-13  8:41                 ` Arjan van de Ven
2007-02-13 17:09               ` Christoph Lameter
2007-02-13 17:20                 ` Andi Kleen
2007-02-13 22:18                   ` Vojtech Pavlik
2007-02-13 22:38                     ` Andrea Arcangeli
2007-02-14  6:59                       ` Vojtech Pavlik
2007-02-13 23:55                     ` Christoph Lameter
2007-02-14  0:18                   ` Paul Mackerras
2007-02-14  0:25                     ` john stultz
2007-02-02  7:13       ` Andi Kleen
2007-02-01 21:05     ` mbligh
2007-02-03  1:16   ` H. Peter Anvin
2007-02-01  9:59 ` [patch 5/9] Add all the necessary structures to the vsyscall page jbohac
2007-02-01 11:17   ` Andi Kleen
2007-02-01  9:59 ` [patch 6/9] Add the "Master Timer" jbohac
2007-02-01 11:22   ` Andi Kleen
2007-02-01 13:29     ` Jiri Bohac
2007-02-01  9:59 ` [patch 7/9] Adapt the time initialization code jbohac
2007-02-01 11:26   ` Andi Kleen
2007-02-01 13:41     ` Jiri Bohac
2007-02-01 10:00 ` jbohac [this message]
2007-02-01 11:28   ` [patch 8/9] Add time_update_mt_guess() Andi Kleen
2007-02-01 13:54     ` Jiri Bohac
2007-02-01 10:00 ` [patch 9/9] Make use of the Master Timer jbohac
2007-02-01 11:36   ` Andi Kleen
2007-02-01 14:29     ` Jiri Bohac
2007-02-01 15:23       ` Vojtech Pavlik
2007-02-02  7:05         ` Andi Kleen
2007-02-02  7:04       ` Andi Kleen
2007-02-01 11:20 ` [patch 0/9] x86_64: reliable TSC-based gettimeofday Andi Kleen
2007-02-01 11:53   ` Andrea Arcangeli
2007-02-01 12:02     ` Andi Kleen
2007-02-01 12:54       ` Andrea Arcangeli
2007-02-01 12:17   ` Ingo Molnar
2007-02-01 14:52   ` Jiri Bohac
2007-02-01 16:56     ` john stultz
2007-02-01 19:41       ` Vojtech Pavlik
2007-02-01 11:34 ` Ingo Molnar
2007-02-01 11:46 ` [-mm patch] x86_64 GTOD: offer scalable vgettimeofday Ingo Molnar
2007-02-01 12:01   ` Andi Kleen
2007-02-01 12:14     ` Ingo Molnar
2007-02-01 12:17   ` [-mm patch] x86_64 GTOD: offer scalable vgettimeofday II Andi Kleen
2007-02-01 12:24     ` Ingo Molnar
2007-02-01 12:45       ` Andi Kleen
2007-02-02  4:22 ` [patch 0/9] x86_64: reliable TSC-based gettimeofday Andrew Morton
2007-02-02  7:07   ` Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070201103754.136630000@jet.suse.cz \
    --to=jbohac@suse.cz \
    --cc=ak@suse.de \
    --cc=andrea@suse.de \
    --cc=arjan@infradead.org \
    --cc=johnstul@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ssouhlal@freebsd.org \
    --cc=tglx@linutronix.de \
    --cc=vojtech@suse.cz \
    --cc=zippel@linux-m68k.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).