All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
@ 2015-05-21  7:55 Adrian Hunter
  2015-06-01  7:57 ` Adrian Hunter
  2015-06-02 19:33 ` Thomas Gleixner
  0 siblings, 2 replies; 48+ messages in thread
From: Adrian Hunter @ 2015-05-21  7:55 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: linux-kernel, Andy Lutomirski, Linus Torvalds, Andi Kleen, x86,
	H. Peter Anvin

If it takes longer than 12us to read the PIT counter lsb/msb,
then the error margin will never fall below 500ppm within 50ms,
and Fast TSC calibration will always fail.

This patch detects when that will happen and switches to using
a slightly different algorithm that takes advantage of the PIT's
latch comand.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 arch/x86/kernel/tsc.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 132 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 5054497..0035682 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -553,6 +553,40 @@ static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *de
 }
 
 /*
+ * High latency version of pit_expect_msb(). Instead of using the read latency
+ * as the error margin, latch the counter and use that latency. The counter
+ * latches at the current value which means there is an addition error margin
+ * of 1 timer tick which is not accounted for here.
+ */
+static inline int pit_expect_msb_hl(unsigned char val, u64 *tscp,
+				    unsigned long *deltap, u16 *cnt)
+{
+	u64 tsc0, tsc1;
+	int count;
+	u8 lsb, msb;
+
+	for (count = 0; count < 50000; count++) {
+		tsc0 = get_cycles();
+		/* Latch counter 2 */
+		outb(0x80, 0x43);
+		tsc1 = get_cycles();
+		lsb = inb(0x42);
+		msb = inb(0x42);
+		if (msb != val)
+			break;
+	}
+	*deltap = tsc1 - tsc0;
+	*tscp = tsc0;
+	*cnt = lsb | ((u16)msb << 8);
+
+	/*
+	 * We require _some_ success, but the quality control
+	 * will be based on the error terms on the TSC values.
+	 */
+	return count > 5;
+}
+
+/*
  * How many MSB values do we want to see? We aim for
  * a maximum error rate of 500ppm (in practice the
  * real error is much smaller), but refuse to spend
@@ -561,6 +595,94 @@ static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *de
 #define MAX_QUICK_PIT_MS 50
 #define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
 
+/*
+ * High latency version of quick_pit_calibrate() that works even if there is a
+ * high latency reading the PIT lsb/msb. This is needed if it takes longer than
+ * 12us to read the lsb/msb because then the error margin will never fall below
+ * 500ppm within 50ms.
+ */
+static unsigned long quick_pit_calibrate_hl(void)
+{
+	int i;
+	u64 tsc, delta;
+	unsigned long d1, d2;
+	u16 cnt0, cnt1, dc;
+
+	/* Re-start at 0xffff */
+	outb(0xff, 0x42);
+	outb(0xff, 0x42);
+
+	/*
+	 * The PIT starts counting at the next edge, so we
+	 * need to delay for a microsecond. The easiest way
+	 * to do that is to just read back the 16-bit counter
+	 * once from the PIT.
+	 */
+	pit_verify_msb(0);
+
+	/*
+	 * Iterate until the error is less than 500 ppm. The error margin due to
+	 * the time to latch the counter is d1 + d2. The counter latches the
+	 * current count which introduces a one tick error at the start and end.
+	 * So the total error is  d1 + d2 + 2 timer ticks. A timer tick is
+	 * approximately the TSC delta divided by the timer delta. So the error
+	 * margin is too high while:
+	 *      d1 + d2 + 2 * (delta / dc) >= delta >> 11
+	 *   => d1 + d2 >= (delta >> 11) - 2 * (delta / dc)
+	 *   => d1 + d2 >= (delta - 4096 * (delta / dc)) / 2048
+	 *   => (d1 + d2) * dc >= (dc * delta - 4096 * delta) / 2048
+	 *   => (d1 + d2) * dc >= (dc - 4096) * delta >> 11
+	*/
+	if (pit_expect_msb_hl(0xff, &tsc, &d1, &cnt0)) {
+		for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
+			if (!pit_expect_msb_hl(0xff - i, &delta, &d2, &cnt1))
+				break;
+
+			dc = cnt0 - cnt1;
+			if (dc < 4096)
+				continue;
+
+			delta -= tsc;
+
+			if ((d1 + d2) * (u64)dc >= (dc - 4096) * delta >> 11)
+				continue;
+
+			/*
+			 * Check the PIT one more time to verify that
+			 * all TSC reads were stable wrt the PIT.
+			 *
+			 * This also guarantees serialization of the
+			 * last cycle read ('d2') in pit_expect_msb.
+			 */
+			if (!pit_verify_msb(0xfe - i))
+				break;
+			goto success;
+		}
+	}
+	pr_info("Fast TSC calibration (with latch) failed\n");
+	return 0;
+
+success:
+	/*
+	 * Ok, if we get here, then we've seen the
+	 * MSB of the PIT decrement 'i' times, and the
+	 * error has shrunk to less than 500 ppm.
+	 *
+	 * As a result, we can depend on there not being
+	 * any odd delays anywhere, and the TSC reads are
+	 * reliable (within the error).
+	 *
+	 * kHz = ticks / time-in-seconds / 1000;
+	 * kHz = ticks / (PIT count / PIT_TICK_RATE) / 1000
+	 * kHz = delta / (dc / PIT_TICK_RATE) / 1000
+	 * kHz = (delta * PIT_TICK_RATE) / (dc * 1000)
+	 */
+	delta *= PIT_TICK_RATE;
+	do_div(delta, dc * 1000);
+	pr_info("Fast TSC calibration (with latch) using PIT\n");
+	return delta;
+}
+
 static unsigned long quick_pit_calibrate(void)
 {
 	int i;
@@ -598,10 +720,19 @@ static unsigned long quick_pit_calibrate(void)
 			if (!pit_expect_msb(0xff-i, &delta, &d2))
 				break;
 
+			delta -= tsc;
+
+			/*
+			 * Extrapolate the error and switch to high-latency
+			 * algorithm if the error will never be below 500 ppm.
+			 */
+			if (i == 1 &&
+			    d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
+				return quick_pit_calibrate_hl();
+
 			/*
 			 * Iterate until the error is less than 500 ppm
 			 */
-			delta -= tsc;
 			if (d1+d2 >= delta >> 11)
 				continue;
 
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-05-21  7:55 [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate() Adrian Hunter
@ 2015-06-01  7:57 ` Adrian Hunter
  2015-06-02 13:58   ` Thomas Gleixner
  2015-06-02 19:33 ` Thomas Gleixner
  1 sibling, 1 reply; 48+ messages in thread
From: Adrian Hunter @ 2015-06-01  7:57 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: linux-kernel, Andy Lutomirski, Linus Torvalds, Andi Kleen, x86,
	H. Peter Anvin

On 21/05/15 10:55, Adrian Hunter wrote:
> If it takes longer than 12us to read the PIT counter lsb/msb,
> then the error margin will never fall below 500ppm within 50ms,
> and Fast TSC calibration will always fail.

Hi

This does happen, so it would be nice to have this fix.
Are there any comments?

Regards
Adrian


> 
> This patch detects when that will happen and switches to using
> a slightly different algorithm that takes advantage of the PIT's
> latch comand.
> 
> Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
> ---
>  arch/x86/kernel/tsc.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 132 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index 5054497..0035682 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -553,6 +553,40 @@ static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *de
>  }
>  
>  /*
> + * High latency version of pit_expect_msb(). Instead of using the read latency
> + * as the error margin, latch the counter and use that latency. The counter
> + * latches at the current value which means there is an addition error margin
> + * of 1 timer tick which is not accounted for here.
> + */
> +static inline int pit_expect_msb_hl(unsigned char val, u64 *tscp,
> +				    unsigned long *deltap, u16 *cnt)
> +{
> +	u64 tsc0, tsc1;
> +	int count;
> +	u8 lsb, msb;
> +
> +	for (count = 0; count < 50000; count++) {
> +		tsc0 = get_cycles();
> +		/* Latch counter 2 */
> +		outb(0x80, 0x43);
> +		tsc1 = get_cycles();
> +		lsb = inb(0x42);
> +		msb = inb(0x42);
> +		if (msb != val)
> +			break;
> +	}
> +	*deltap = tsc1 - tsc0;
> +	*tscp = tsc0;
> +	*cnt = lsb | ((u16)msb << 8);
> +
> +	/*
> +	 * We require _some_ success, but the quality control
> +	 * will be based on the error terms on the TSC values.
> +	 */
> +	return count > 5;
> +}
> +
> +/*
>   * How many MSB values do we want to see? We aim for
>   * a maximum error rate of 500ppm (in practice the
>   * real error is much smaller), but refuse to spend
> @@ -561,6 +595,94 @@ static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *de
>  #define MAX_QUICK_PIT_MS 50
>  #define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
>  
> +/*
> + * High latency version of quick_pit_calibrate() that works even if there is a
> + * high latency reading the PIT lsb/msb. This is needed if it takes longer than
> + * 12us to read the lsb/msb because then the error margin will never fall below
> + * 500ppm within 50ms.
> + */
> +static unsigned long quick_pit_calibrate_hl(void)
> +{
> +	int i;
> +	u64 tsc, delta;
> +	unsigned long d1, d2;
> +	u16 cnt0, cnt1, dc;
> +
> +	/* Re-start at 0xffff */
> +	outb(0xff, 0x42);
> +	outb(0xff, 0x42);
> +
> +	/*
> +	 * The PIT starts counting at the next edge, so we
> +	 * need to delay for a microsecond. The easiest way
> +	 * to do that is to just read back the 16-bit counter
> +	 * once from the PIT.
> +	 */
> +	pit_verify_msb(0);
> +
> +	/*
> +	 * Iterate until the error is less than 500 ppm. The error margin due to
> +	 * the time to latch the counter is d1 + d2. The counter latches the
> +	 * current count which introduces a one tick error at the start and end.
> +	 * So the total error is  d1 + d2 + 2 timer ticks. A timer tick is
> +	 * approximately the TSC delta divided by the timer delta. So the error
> +	 * margin is too high while:
> +	 *      d1 + d2 + 2 * (delta / dc) >= delta >> 11
> +	 *   => d1 + d2 >= (delta >> 11) - 2 * (delta / dc)
> +	 *   => d1 + d2 >= (delta - 4096 * (delta / dc)) / 2048
> +	 *   => (d1 + d2) * dc >= (dc * delta - 4096 * delta) / 2048
> +	 *   => (d1 + d2) * dc >= (dc - 4096) * delta >> 11
> +	*/
> +	if (pit_expect_msb_hl(0xff, &tsc, &d1, &cnt0)) {
> +		for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
> +			if (!pit_expect_msb_hl(0xff - i, &delta, &d2, &cnt1))
> +				break;
> +
> +			dc = cnt0 - cnt1;
> +			if (dc < 4096)
> +				continue;
> +
> +			delta -= tsc;
> +
> +			if ((d1 + d2) * (u64)dc >= (dc - 4096) * delta >> 11)
> +				continue;
> +
> +			/*
> +			 * Check the PIT one more time to verify that
> +			 * all TSC reads were stable wrt the PIT.
> +			 *
> +			 * This also guarantees serialization of the
> +			 * last cycle read ('d2') in pit_expect_msb.
> +			 */
> +			if (!pit_verify_msb(0xfe - i))
> +				break;
> +			goto success;
> +		}
> +	}
> +	pr_info("Fast TSC calibration (with latch) failed\n");
> +	return 0;
> +
> +success:
> +	/*
> +	 * Ok, if we get here, then we've seen the
> +	 * MSB of the PIT decrement 'i' times, and the
> +	 * error has shrunk to less than 500 ppm.
> +	 *
> +	 * As a result, we can depend on there not being
> +	 * any odd delays anywhere, and the TSC reads are
> +	 * reliable (within the error).
> +	 *
> +	 * kHz = ticks / time-in-seconds / 1000;
> +	 * kHz = ticks / (PIT count / PIT_TICK_RATE) / 1000
> +	 * kHz = delta / (dc / PIT_TICK_RATE) / 1000
> +	 * kHz = (delta * PIT_TICK_RATE) / (dc * 1000)
> +	 */
> +	delta *= PIT_TICK_RATE;
> +	do_div(delta, dc * 1000);
> +	pr_info("Fast TSC calibration (with latch) using PIT\n");
> +	return delta;
> +}
> +
>  static unsigned long quick_pit_calibrate(void)
>  {
>  	int i;
> @@ -598,10 +720,19 @@ static unsigned long quick_pit_calibrate(void)
>  			if (!pit_expect_msb(0xff-i, &delta, &d2))
>  				break;
>  
> +			delta -= tsc;
> +
> +			/*
> +			 * Extrapolate the error and switch to high-latency
> +			 * algorithm if the error will never be below 500 ppm.
> +			 */
> +			if (i == 1 &&
> +			    d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
> +				return quick_pit_calibrate_hl();
> +
>  			/*
>  			 * Iterate until the error is less than 500 ppm
>  			 */
> -			delta -= tsc;
>  			if (d1+d2 >= delta >> 11)
>  				continue;
>  
> 


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-01  7:57 ` Adrian Hunter
@ 2015-06-02 13:58   ` Thomas Gleixner
  0 siblings, 0 replies; 48+ messages in thread
From: Thomas Gleixner @ 2015-06-02 13:58 UTC (permalink / raw)
  To: Adrian Hunter
  Cc: linux-kernel, Andy Lutomirski, Linus Torvalds, Andi Kleen, x86,
	H. Peter Anvin

On Mon, 1 Jun 2015, Adrian Hunter wrote:

> On 21/05/15 10:55, Adrian Hunter wrote:
> > If it takes longer than 12us to read the PIT counter lsb/msb,
> > then the error margin will never fall below 500ppm within 50ms,
> > and Fast TSC calibration will always fail.
> 
> Hi
> 
> This does happen, so it would be nice to have this fix.
> Are there any comments?

Yes. Its on my review list...

Thanks,

	tglx


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-05-21  7:55 [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate() Adrian Hunter
  2015-06-01  7:57 ` Adrian Hunter
@ 2015-06-02 19:33 ` Thomas Gleixner
  2015-06-02 19:41   ` Andy Lutomirski
  2015-06-03  4:20   ` [PATCH RFC] x86, tsc: Allow for high latency " Linus Torvalds
  1 sibling, 2 replies; 48+ messages in thread
From: Thomas Gleixner @ 2015-06-02 19:33 UTC (permalink / raw)
  To: Adrian Hunter
  Cc: LKML, Andy Lutomirski, Linus Torvalds, Andi Kleen, x86,
	H. Peter Anvin, Len Brown

On Thu, 21 May 2015, Adrian Hunter wrote:

> If it takes longer than 12us to read the PIT counter lsb/msb,
> then the error margin will never fall below 500ppm within 50ms,
> and Fast TSC calibration will always fail.

So finally the legacy PIT emulation takes longer than the 30 years old
hardware implementation. Progress!
 
> This patch detects when that will happen and switches to using
> a slightly different algorithm that takes advantage of the PIT's
> latch comand.

Is there really no smarter way to figure out the TSC frequency on
modern systems?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-02 19:33 ` Thomas Gleixner
@ 2015-06-02 19:41   ` Andy Lutomirski
  2015-06-02 19:43     ` Andi Kleen
  2015-06-03  4:20   ` [PATCH RFC] x86, tsc: Allow for high latency " Linus Torvalds
  1 sibling, 1 reply; 48+ messages in thread
From: Andy Lutomirski @ 2015-06-02 19:41 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Adrian Hunter, LKML, Linus Torvalds, Andi Kleen, X86 ML,
	H. Peter Anvin, Len Brown

On Tue, Jun 2, 2015 at 12:33 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> On Thu, 21 May 2015, Adrian Hunter wrote:
>
>> If it takes longer than 12us to read the PIT counter lsb/msb,
>> then the error margin will never fall below 500ppm within 50ms,
>> and Fast TSC calibration will always fail.
>
> So finally the legacy PIT emulation takes longer than the 30 years old
> hardware implementation. Progress!
>
>> This patch detects when that will happen and switches to using
>> a slightly different algorithm that takes advantage of the PIT's
>> latch comand.
>
> Is there really no smarter way to figure out the TSC frequency on
> modern systems?

I just asked Len this question yesterday.  intel_pstate can do it,
although the algorithm is a bit gross.

--Andy

>
> Thanks,
>
>         tglx



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-02 19:41   ` Andy Lutomirski
@ 2015-06-02 19:43     ` Andi Kleen
  2015-06-02 19:58       ` Thomas Gleixner
  0 siblings, 1 reply; 48+ messages in thread
From: Andi Kleen @ 2015-06-02 19:43 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Adrian Hunter, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown

On Tue, Jun 02, 2015 at 12:41:27PM -0700, Andy Lutomirski wrote:
> On Tue, Jun 2, 2015 at 12:33 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > On Thu, 21 May 2015, Adrian Hunter wrote:
> >
> >> If it takes longer than 12us to read the PIT counter lsb/msb,
> >> then the error margin will never fall below 500ppm within 50ms,
> >> and Fast TSC calibration will always fail.
> >
> > So finally the legacy PIT emulation takes longer than the 30 years old
> > hardware implementation. Progress!
> >
> >> This patch detects when that will happen and switches to using
> >> a slightly different algorithm that takes advantage of the PIT's
> >> latch comand.
> >
> > Is there really no smarter way to figure out the TSC frequency on
> > modern systems?
> 
> I just asked Len this question yesterday.  intel_pstate can do it,
> although the algorithm is a bit gross.

intel_pstate needs to know the model number. If you know the 
model number sure you can do a lot better (e.g. using the 
ref-clock fixed counter or some other methods)

But if you don't you need something else. And at some point
the only thing left over is the PIT.

-Andi

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-02 19:43     ` Andi Kleen
@ 2015-06-02 19:58       ` Thomas Gleixner
  2015-06-02 20:03         ` Andy Lutomirski
  0 siblings, 1 reply; 48+ messages in thread
From: Thomas Gleixner @ 2015-06-02 19:58 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andy Lutomirski, Adrian Hunter, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown

On Tue, 2 Jun 2015, Andi Kleen wrote:
> On Tue, Jun 02, 2015 at 12:41:27PM -0700, Andy Lutomirski wrote:
> > On Tue, Jun 2, 2015 at 12:33 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> > > On Thu, 21 May 2015, Adrian Hunter wrote:
> > >
> > >> If it takes longer than 12us to read the PIT counter lsb/msb,
> > >> then the error margin will never fall below 500ppm within 50ms,
> > >> and Fast TSC calibration will always fail.
> > >
> > > So finally the legacy PIT emulation takes longer than the 30 years old
> > > hardware implementation. Progress!
> > >
> > >> This patch detects when that will happen and switches to using
> > >> a slightly different algorithm that takes advantage of the PIT's
> > >> latch comand.
> > >
> > > Is there really no smarter way to figure out the TSC frequency on
> > > modern systems?
> > 
> > I just asked Len this question yesterday.  intel_pstate can do it,
> > although the algorithm is a bit gross.

Well, the algorithm for that slow PIT emulation thing is definitely
not from the straight forward kind either.

> intel_pstate needs to know the model number. If you know the 
> model number sure you can do a lot better (e.g. using the 
> ref-clock fixed counter or some other methods)
> 
> But if you don't you need something else. And at some point
> the only thing left over is the PIT.

Right, and if we dont have a way to readout the stuff because the
kernel does not yet have support for that particular model it falls
back to PIT slow path in the worst case.

That's better than having another weird calibration routine which
might be outdated with the next generation of PIT emulation. We've
been there with the HPET deferred writes already. No need to have more
of this nonsense.

It's about time that Intel gets its act together and provides a proper
architected way to figure that out. Though I fear that will require
another 15 years of yelling (IIRC that was the time it took to get a
constant frequency TSC).

Thanks,

	tglx





^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-02 19:58       ` Thomas Gleixner
@ 2015-06-02 20:03         ` Andy Lutomirski
  2015-06-02 20:20           ` Andi Kleen
  0 siblings, 1 reply; 48+ messages in thread
From: Andy Lutomirski @ 2015-06-02 20:03 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andi Kleen, Adrian Hunter, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown

On Tue, Jun 2, 2015 at 12:58 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> On Tue, 2 Jun 2015, Andi Kleen wrote:
>> On Tue, Jun 02, 2015 at 12:41:27PM -0700, Andy Lutomirski wrote:
>> > On Tue, Jun 2, 2015 at 12:33 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
>> > > On Thu, 21 May 2015, Adrian Hunter wrote:
>> > >
>> > >> If it takes longer than 12us to read the PIT counter lsb/msb,
>> > >> then the error margin will never fall below 500ppm within 50ms,
>> > >> and Fast TSC calibration will always fail.
>> > >
>> > > So finally the legacy PIT emulation takes longer than the 30 years old
>> > > hardware implementation. Progress!
>> > >
>> > >> This patch detects when that will happen and switches to using
>> > >> a slightly different algorithm that takes advantage of the PIT's
>> > >> latch comand.
>> > >
>> > > Is there really no smarter way to figure out the TSC frequency on
>> > > modern systems?
>> >
>> > I just asked Len this question yesterday.  intel_pstate can do it,
>> > although the algorithm is a bit gross.
>
> Well, the algorithm for that slow PIT emulation thing is definitely
> not from the straight forward kind either.
>
>> intel_pstate needs to know the model number. If you know the
>> model number sure you can do a lot better (e.g. using the
>> ref-clock fixed counter or some other methods)
>>
>> But if you don't you need something else. And at some point
>> the only thing left over is the PIT.
>
> Right, and if we dont have a way to readout the stuff because the
> kernel does not yet have support for that particular model it falls
> back to PIT slow path in the worst case.
>
> That's better than having another weird calibration routine which
> might be outdated with the next generation of PIT emulation. We've
> been there with the HPET deferred writes already. No need to have more
> of this nonsense.
>
> It's about time that Intel gets its act together and provides a proper
> architected way to figure that out. Though I fear that will require
> another 15 years of yelling (IIRC that was the time it took to get a
> constant frequency TSC).

There's the code in tsc_msr.c.  It should be relatively
straightforward to extend it to cover everything that intel_pstate
supports.

But yes, Intel should add an MSR that gives the frequency in Hz.

--Andy

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-02 20:03         ` Andy Lutomirski
@ 2015-06-02 20:20           ` Andi Kleen
  2015-06-02 21:03             ` Thomas Gleixner
  0 siblings, 1 reply; 48+ messages in thread
From: Andi Kleen @ 2015-06-02 20:20 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Adrian Hunter, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown

> There's the code in tsc_msr.c.  It should be relatively
> straightforward to extend it to cover everything that intel_pstate
> supports.

That's a good idea, but we still need an always working fallback when the
model number is not available. So Adrian's patch is needed in any
case.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-02 20:20           ` Andi Kleen
@ 2015-06-02 21:03             ` Thomas Gleixner
  2015-06-02 23:38               ` Andi Kleen
  0 siblings, 1 reply; 48+ messages in thread
From: Thomas Gleixner @ 2015-06-02 21:03 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andy Lutomirski, Adrian Hunter, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown



On Tue, 2 Jun 2015, Andi Kleen wrote:

> > There's the code in tsc_msr.c.  It should be relatively
> > straightforward to extend it to cover everything that intel_pstate
> > supports.
> 
> That's a good idea, but we still need an always working fallback when the
> model number is not available. So Adrian's patch is needed in any
> case.
 
Nonsense. The slow calibration is already a working fallback.

Thanks,

	tglx
 

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-02 21:03             ` Thomas Gleixner
@ 2015-06-02 23:38               ` Andi Kleen
  2015-06-03  0:21                 ` Andy Lutomirski
  0 siblings, 1 reply; 48+ messages in thread
From: Andi Kleen @ 2015-06-02 23:38 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andy Lutomirski, Adrian Hunter, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown

On Tue, Jun 02, 2015 at 11:03:26PM +0200, Thomas Gleixner wrote:
> 
> 
> On Tue, 2 Jun 2015, Andi Kleen wrote:
> 
> > > There's the code in tsc_msr.c.  It should be relatively
> > > straightforward to extend it to cover everything that intel_pstate
> > > supports.
> > 
> > That's a good idea, but we still need an always working fallback when the
> > model number is not available. So Adrian's patch is needed in any
> > case.
>  
> Nonsense. The slow calibration is already a working fallback.

Please read Adrian's description again. It's not working when the PIT read is
too slow. That is when the new algorithm is needed.

-Andi

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-02 23:38               ` Andi Kleen
@ 2015-06-03  0:21                 ` Andy Lutomirski
  2015-06-03  0:39                   ` Andi Kleen
  0 siblings, 1 reply; 48+ messages in thread
From: Andy Lutomirski @ 2015-06-03  0:21 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Thomas Gleixner, Adrian Hunter, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown

On Tue, Jun 2, 2015 at 4:38 PM, Andi Kleen <ak@linux.intel.com> wrote:
> On Tue, Jun 02, 2015 at 11:03:26PM +0200, Thomas Gleixner wrote:
>>
>>
>> On Tue, 2 Jun 2015, Andi Kleen wrote:
>>
>> > > There's the code in tsc_msr.c.  It should be relatively
>> > > straightforward to extend it to cover everything that intel_pstate
>> > > supports.
>> >
>> > That's a good idea, but we still need an always working fallback when the
>> > model number is not available. So Adrian's patch is needed in any
>> > case.
>>
>> Nonsense. The slow calibration is already a working fallback.
>
> Please read Adrian's description again. It's not working when the PIT read is
> too slow. That is when the new algorithm is needed.
>

tglx's suggestion was to use slow calibration as a fallback.

--Andy

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03  0:21                 ` Andy Lutomirski
@ 2015-06-03  0:39                   ` Andi Kleen
  2015-06-03  0:58                     ` Andy Lutomirski
  0 siblings, 1 reply; 48+ messages in thread
From: Andi Kleen @ 2015-06-03  0:39 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Adrian Hunter, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown

On Tue, Jun 02, 2015 at 05:21:27PM -0700, Andy Lutomirski wrote:
> On Tue, Jun 2, 2015 at 4:38 PM, Andi Kleen <ak@linux.intel.com> wrote:
> > On Tue, Jun 02, 2015 at 11:03:26PM +0200, Thomas Gleixner wrote:
> >>
> >>
> >> On Tue, 2 Jun 2015, Andi Kleen wrote:
> >>
> >> > > There's the code in tsc_msr.c.  It should be relatively
> >> > > straightforward to extend it to cover everything that intel_pstate
> >> > > supports.
> >> >
> >> > That's a good idea, but we still need an always working fallback when the
> >> > model number is not available. So Adrian's patch is needed in any
> >> > case.
> >>
> >> Nonsense. The slow calibration is already a working fallback.
> >
> > Please read Adrian's description again. It's not working when the PIT read is
> > too slow. That is when the new algorithm is needed.
> >
> 
> tglx's suggestion was to use slow calibration as a fallback.

You mean the last fallback we have today?

That one doesn't work if the PIT read is too slow.

And Adrian's patch is fixing that.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03  0:39                   ` Andi Kleen
@ 2015-06-03  0:58                     ` Andy Lutomirski
  2015-06-03  3:30                       ` Andi Kleen
  0 siblings, 1 reply; 48+ messages in thread
From: Andy Lutomirski @ 2015-06-03  0:58 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Thomas Gleixner, Adrian Hunter, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown

On Tue, Jun 2, 2015 at 5:39 PM, Andi Kleen <ak@linux.intel.com> wrote:
> On Tue, Jun 02, 2015 at 05:21:27PM -0700, Andy Lutomirski wrote:
>> On Tue, Jun 2, 2015 at 4:38 PM, Andi Kleen <ak@linux.intel.com> wrote:
>> > On Tue, Jun 02, 2015 at 11:03:26PM +0200, Thomas Gleixner wrote:
>> >>
>> >>
>> >> On Tue, 2 Jun 2015, Andi Kleen wrote:
>> >>
>> >> > > There's the code in tsc_msr.c.  It should be relatively
>> >> > > straightforward to extend it to cover everything that intel_pstate
>> >> > > supports.
>> >> >
>> >> > That's a good idea, but we still need an always working fallback when the
>> >> > model number is not available. So Adrian's patch is needed in any
>> >> > case.
>> >>
>> >> Nonsense. The slow calibration is already a working fallback.
>> >
>> > Please read Adrian's description again. It's not working when the PIT read is
>> > too slow. That is when the new algorithm is needed.
>> >
>>
>> tglx's suggestion was to use slow calibration as a fallback.
>
> You mean the last fallback we have today?
>
> That one doesn't work if the PIT read is too slow.
>
> And Adrian's patch is fixing that.

Then the changelog should say that I think.  The current text says
"Fast TSC calibration will always fail", which, to me, suggests that
either the slow calibration will work or that the changelog message
should be changed.

--Andy

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03  0:58                     ` Andy Lutomirski
@ 2015-06-03  3:30                       ` Andi Kleen
  2015-06-03  8:13                         ` Adrian Hunter
  0 siblings, 1 reply; 48+ messages in thread
From: Andi Kleen @ 2015-06-03  3:30 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Adrian Hunter, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown

> Then the changelog should say that I think.  The current text says
> "Fast TSC calibration will always fail", which, to me, suggests that
> either the slow calibration will work or that the changelog message
> should be changed.

Ok. No, the slow calibration works I believe.

-Andi

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-02 19:33 ` Thomas Gleixner
  2015-06-02 19:41   ` Andy Lutomirski
@ 2015-06-03  4:20   ` Linus Torvalds
  2015-06-03  6:20     ` Ingo Molnar
  1 sibling, 1 reply; 48+ messages in thread
From: Linus Torvalds @ 2015-06-03  4:20 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Adrian Hunter, LKML, Andy Lutomirski, Andi Kleen,
	the arch/x86 maintainers, H. Peter Anvin, Len Brown

On Tue, Jun 2, 2015 at 12:33 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
>
> Is there really no smarter way to figure out the TSC frequency on
> modern systems?

Sadly, if there is, we have yet to find it.

I don't think the mentioned intel_pstate thing gets it right either.
Yes, it gets some "theoretical frequency", given the pstate and the
scaling factor, but that assumes the base clock (BCLK) is something
trustworthy. That's a big assumption, and I can pretty much guarantee
it's not a valid assumption.

I'm sure that's the *common* case, but how much do you want to bet
that some motherboard makers noticed that they get good press from
good benchmark numbers, and that they can tweak BCLK a bit higher than
the competition? "Oooh, motherboard from Xyz is really good, it's 1%
faster than the competition".

Or all those overclockers? Yes, some of them buy unlocked CPU's and
leave BCLK alone. Or maybe they do both. Or maybe they just buy locked
CPU's and tweak BCLK.

The only *well-defined* clock in a modern PC seems to still remain the
PIT. Yes, it's very sad.  But all the other clocks tend to be
untrustworthy for various reasons, like "frequency is stated in the
ACPI tables", which we know is basically just a fantasy that may be
right *most* of the time, but I can almost guarantee that some BIOS
guys decided that they can get slightly better random benchmark
numbers by "tweaking" the truth a bit. It's the "BIOS version" of
overclocking by increasing BCLK - lie about the PM frequency, so that
any benchmark that times things that way will give better numbers..

But pretty much nobody messes up the PIT. That will change, just
because it's clearly getting to the point where people are just
emulating it, and it's probably not going to be any more trustworthy
than anything else.

So rather than get a new and truly trustworthy reference clock, the
patch makes me suspect we're losing the old one...

              Linus

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03  4:20   ` [PATCH RFC] x86, tsc: Allow for high latency " Linus Torvalds
@ 2015-06-03  6:20     ` Ingo Molnar
  2015-06-03 13:43       ` Linus Torvalds
  0 siblings, 1 reply; 48+ messages in thread
From: Ingo Molnar @ 2015-06-03  6:20 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Thomas Gleixner, Adrian Hunter, LKML, Andy Lutomirski,
	Andi Kleen, the arch/x86 maintainers, H. Peter Anvin, Len Brown


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Tue, Jun 2, 2015 at 12:33 PM, Thomas Gleixner <tglx@linutronix.de> wrote:
> >
> > Is there really no smarter way to figure out the TSC frequency on
> > modern systems?
> 
> Sadly, if there is, we have yet to find it.
> 
> I don't think the mentioned intel_pstate thing gets it right either. Yes, it 
> gets some "theoretical frequency", given the pstate and the scaling factor, but 
> that assumes the base clock (BCLK) is something trustworthy. That's a big 
> assumption, and I can pretty much guarantee it's not a valid assumption.
> 
> I'm sure that's the *common* case, but how much do you want to bet that some 
> motherboard makers noticed that they get good press from good benchmark numbers, 
> and that they can tweak BCLK a bit higher than the competition? "Oooh, 
> motherboard from Xyz is really good, it's 1% faster than the competition".
> 
> Or all those overclockers? Yes, some of them buy unlocked CPU's and leave BCLK 
> alone. Or maybe they do both. Or maybe they just buy locked CPU's and tweak 
> BCLK.
>
> The only *well-defined* clock in a modern PC seems to still remain the PIT. 
> [...]

and the HPET, which is pretty good as well, when available. In fact given that 
it's required to have a frequency of at least 10 MHz, and (unlike the PIT) has a 
pretty wide counter, it could be used for pretty accurate calibration as well that 
runs a lot shorter than PIT calibration.

HPETs have some quirks, and are sometimes emulated (on early hardware, when Window 
did not require HPET and the value of HPET was yet unclear?), but I'd say on 
modern x86 systems it ought to be a pretty good clock for calibration as well.

> [...] Yes, it's very sad.  But all the other clocks tend to be untrustworthy for 
> various reasons, like "frequency is stated in the ACPI tables", which we know is 
> basically just a fantasy that may be right *most* of the time, but I can almost 
> guarantee that some BIOS guys decided that they can get slightly better random 
> benchmark numbers by "tweaking" the truth a bit. It's the "BIOS version" of 
> overclocking by increasing BCLK - lie about the PM frequency, so that any 
> benchmark that times things that way will give better numbers..

So the HPET frequency comes straight from a hardware register, via:

        hpet_period = hpet_readl(HPET_PERIOD);

now you are right that the 'hardware' is in some cases is implemented via SMM 
virtualization - but so is the PIT I suspect. Given that Windows relies on the 
HPET for timekeeping, it might get more attention than the PIT?

> But pretty much nobody messes up the PIT. That will change, just because it's 
> clearly getting to the point where people are just emulating it, and it's 
> probably not going to be any more trustworthy than anything else.

So I think the reason why these tend to be real hardware most of the time is that 
both the PIT and the HPET are supposed to generate interrupts - which is not that 
easy to 'emulate' via SMM: you have to have a real irq-generating clock in the 
hardware _somewhere_, and that comes with counter and all, so not much left to 
emulate after that point.

What I think might happen is to emulate the PIT via HPET, the lower frequency 
clock with a higher frequency clock.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03  3:30                       ` Andi Kleen
@ 2015-06-03  8:13                         ` Adrian Hunter
  2015-06-03 13:45                           ` Linus Torvalds
                                             ` (2 more replies)
  0 siblings, 3 replies; 48+ messages in thread
From: Adrian Hunter @ 2015-06-03  8:13 UTC (permalink / raw)
  To: Andi Kleen, Thomas Gleixner
  Cc: Andy Lutomirski, LKML, Linus Torvalds, X86 ML, H. Peter Anvin, Len Brown

On 03/06/15 06:30, Andi Kleen wrote:
>> Then the changelog should say that I think.  The current text says
>> "Fast TSC calibration will always fail", which, to me, suggests that
>> either the slow calibration will work or that the changelog message
>> should be changed.
> 
> Ok. No, the slow calibration works I believe.

Yeah, so the (only?) downside is the 50ms wasted on Fast TSC
calibration.  What about this?


From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 3 Jun 2015 10:39:46 +0300
Subject: [PATCH] x86, tsc: Let high latency PIT fail fast in
 quick_pit_calibrate()

If it takes longer than 12us to read the PIT counter lsb/msb,
then the error margin will never fall below 500ppm within 50ms,
and Fast TSC calibration will always fail.

This patch detects when that will happen and fails fast. Note
the failure message is not printed in that case because:
1. it will always happen on that class of hardware
2. the absence of the message is more informative than its
presence

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 arch/x86/kernel/tsc.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7114c86af35e..673c4f25021f 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -598,10 +598,19 @@ static unsigned long quick_pit_calibrate(void)
 			if (!pit_expect_msb(0xff-i, &delta, &d2))
 				break;
 
+			delta -= tsc;
+
+			/*
+			 * Extrapolate the error and fail fast if the error will
+			 * never be below 500 ppm.
+			 */
+			if (i == 1 &&
+			    d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
+				return 0;
+
 			/*
 			 * Iterate until the error is less than 500 ppm
 			 */
-			delta -= tsc;
 			if (d1+d2 >= delta >> 11)
 				continue;
 
-- 
1.9.1



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03  6:20     ` Ingo Molnar
@ 2015-06-03 13:43       ` Linus Torvalds
  2015-06-03 16:47         ` Thomas Gleixner
  0 siblings, 1 reply; 48+ messages in thread
From: Linus Torvalds @ 2015-06-03 13:43 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Thomas Gleixner, Adrian Hunter, LKML, Andy Lutomirski,
	Andi Kleen, the arch/x86 maintainers, H. Peter Anvin, Len Brown

On Tue, Jun 2, 2015 at 11:20 PM, Ingo Molnar <mingo@kernel.org> wrote:
>
> and the HPET, which is pretty good as well, when available. In fact given that
> it's required to have a frequency of at least 10 MHz, and (unlike the PIT) has a
> pretty wide counter, it could be used for pretty accurate calibration as well that
> runs a lot shorter than PIT calibration.

Yeah, you're probably right that we should look into at least having
the option to use the HPET.

That said, the fact that we can read the period from HPET_PERIOD does
*not* make me trust it all that much. I suspect that register is just
another "filled in by firmware" piece of data.

> Given that Windows relies on the
> HPET for timekeeping, it might get more attention than the PIT?

Does Windows actually do that? Becuase if so, that's just about the
strongest argument for HPET there is - it's likely to work, and the
frequency is likely to be correct.

We've had issues with HPET, but for calibration it might very well be
the right thing to do.

Does anybody know what the base oscillator for HPET tends to be? Also,
some googling shows a vmware paper that is not that impressed with the
HPET.

The good thing about the PIT is that it's just *specified* to be
driven off a real crystal running at a very fixed frequency. There's
no gray areas there. Sure, virtualization can screw it up (but will
likely screw up other higher-resolution clocks even more), and hw
designers can cause problems, but it's been pretty reliable.

(Yeah, the CMOS RTC clock should be too, as George Spelvin points out.
That might be worth looking at too).

                           Linus

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03  8:13                         ` Adrian Hunter
@ 2015-06-03 13:45                           ` Linus Torvalds
  2015-06-04 11:28                             ` Adrian Hunter
  2015-06-03 16:23                           ` Thomas Gleixner
  2015-07-06  7:42                           ` [tip:x86/urgent] x86/tsc: Let high latency PIT fail fast " tip-bot for Adrian Hunter
  2 siblings, 1 reply; 48+ messages in thread
From: Linus Torvalds @ 2015-06-03 13:45 UTC (permalink / raw)
  To: Adrian Hunter
  Cc: Andi Kleen, Thomas Gleixner, Andy Lutomirski, LKML, X86 ML,
	H. Peter Anvin, Len Brown

On Wed, Jun 3, 2015 at 1:13 AM, Adrian Hunter <adrian.hunter@intel.com> wrote:
>
> Yeah, so the (only?) downside is the 50ms wasted on Fast TSC
> calibration.  What about this?

That's certainly simpler.

What platform is this? Do you have access to hw people you can shake
down and ask what clock we can really _trust_?

                    Linus

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03  8:13                         ` Adrian Hunter
  2015-06-03 13:45                           ` Linus Torvalds
@ 2015-06-03 16:23                           ` Thomas Gleixner
  2015-06-22 11:21                             ` Adrian Hunter
  2015-07-06  7:42                           ` [tip:x86/urgent] x86/tsc: Let high latency PIT fail fast " tip-bot for Adrian Hunter
  2 siblings, 1 reply; 48+ messages in thread
From: Thomas Gleixner @ 2015-06-03 16:23 UTC (permalink / raw)
  To: Adrian Hunter
  Cc: Andi Kleen, Andy Lutomirski, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown

On Wed, 3 Jun 2015, Adrian Hunter wrote:

> On 03/06/15 06:30, Andi Kleen wrote:
> >> Then the changelog should say that I think.  The current text says
> >> "Fast TSC calibration will always fail", which, to me, suggests that
> >> either the slow calibration will work or that the changelog message
> >> should be changed.
> > 
> > Ok. No, the slow calibration works I believe.
> 
> Yeah, so the (only?) downside is the 50ms wasted on Fast TSC
> calibration.  What about this?

I'm certainly happy to apply this one. 

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03 13:43       ` Linus Torvalds
@ 2015-06-03 16:47         ` Thomas Gleixner
  2015-06-03 17:04           ` Linus Torvalds
  2015-06-03 17:06           ` Ingo Molnar
  0 siblings, 2 replies; 48+ messages in thread
From: Thomas Gleixner @ 2015-06-03 16:47 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, Adrian Hunter, LKML, Andy Lutomirski, Andi Kleen,
	the arch/x86 maintainers, H. Peter Anvin, Len Brown

On Wed, 3 Jun 2015, Linus Torvalds wrote:
> On Tue, Jun 2, 2015 at 11:20 PM, Ingo Molnar <mingo@kernel.org> wrote:
> > Given that Windows relies on the
> > HPET for timekeeping, it might get more attention than the PIT?
> 
> Does Windows actually do that? Becuase if so, that's just about the
> strongest argument for HPET there is - it's likely to work, and the
> frequency is likely to be correct.

At least it used to. Not sure if it still does. 
 
> We've had issues with HPET, but for calibration it might very well be
> the right thing to do.

Right. The issues we had were on the clock events side caused by the
match register delayed writes. I've never seen a bug report about the
frequency being completely wrong, except for crap values which we
filter out. Though, HPET period can be off by more than 500ppm, but I
don't think that matters anymore for timekeeping as we switch to TSC
only after the long term calibration. The quick calibration is just
for getting the TSC frequency roughly correct for udelay.

> Does anybody know what the base oscillator for HPET tends to be? Also,

The most common frequency is 14.318180 MHz.

> some googling shows a vmware paper that is not that impressed with the
> HPET.

Yeah, they complain about period being off by 600ppm and therefor
being useless for timekeeping, but that's not what we want to use it
for.
 
> The good thing about the PIT is that it's just *specified* to be
> driven off a real crystal running at a very fixed frequency. There's
> no gray areas there. Sure, virtualization can screw it up (but will
> likely screw up other higher-resolution clocks even more), and hw
> designers can cause problems, but it's been pretty reliable.

The other known frequency clock is pmtimer. We use hpet and pmtimer in
the slow calibration fallback already.
 
> (Yeah, the CMOS RTC clock should be too, as George Spelvin points out.
> That might be worth looking at too).

Indeed.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03 16:47         ` Thomas Gleixner
@ 2015-06-03 17:04           ` Linus Torvalds
  2015-06-03 17:50             ` H. Peter Anvin
  2015-06-03 17:06           ` Ingo Molnar
  1 sibling, 1 reply; 48+ messages in thread
From: Linus Torvalds @ 2015-06-03 17:04 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Ingo Molnar, Adrian Hunter, LKML, Andy Lutomirski, Andi Kleen,
	the arch/x86 maintainers, H. Peter Anvin, Len Brown

On Wed, Jun 3, 2015 at 9:47 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
>
>> Does anybody know what the base oscillator for HPET tends to be? Also,
>
> The most common frequency is 14.318180 MHz.

I was more thinking along the lines of "which actually crystal is
driving it" than the frequency.

That said, that particular frequency does give a clue. That's exactly
12x the PIT frequency. So they presumably share the same basic
crystal, just different PLL's.

Which is a good indication that we should get very similar clock
stability with the HPET as with the PIC.

We might even use that kind of knowledge to decide "ok, let's use the
HPET as the reference clock". IOW, only use the HPET if we find it
listed, _and_ we get the expected frequency from the HPET_PERIOD
register.

Anybody who gives odd HPET_PERIOD values is likely doing somehing
different and possibly questionable.

            Linus

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03 16:47         ` Thomas Gleixner
  2015-06-03 17:04           ` Linus Torvalds
@ 2015-06-03 17:06           ` Ingo Molnar
  1 sibling, 0 replies; 48+ messages in thread
From: Ingo Molnar @ 2015-06-03 17:06 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Linus Torvalds, Adrian Hunter, LKML, Andy Lutomirski, Andi Kleen,
	the arch/x86 maintainers, H. Peter Anvin, Len Brown


* Thomas Gleixner <tglx@linutronix.de> wrote:

> On Wed, 3 Jun 2015, Linus Torvalds wrote:
> > On Tue, Jun 2, 2015 at 11:20 PM, Ingo Molnar <mingo@kernel.org> wrote:
> > >
> > > Given that Windows relies on the HPET for timekeeping, it might get more 
> > > attention than the PIT?
> > 
> > Does Windows actually do that? Becuase if so, that's just about the strongest 
> > argument for HPET there is - it's likely to work, and the frequency is likely 
> > to be correct.
> 
> At least it used to. Not sure if it still does.

So I googled around a bit, and it's not as clear-cut as I thought initially: it 
appears that if the BIOS enables the HPET then modern Windows (Windows 7 and 
later) will use it, but there are problems and Windows XP (the largest installed 
base) used the TSC+acpipm. (two other lovely clocks)

> > We've had issues with HPET, but for calibration it might very well be the 
> > right thing to do.
> 
> Right. The issues we had were on the clock events side caused by the match 
> register delayed writes. I've never seen a bug report about the frequency being 
> completely wrong, except for crap values which we filter out. Though, HPET 
> period can be off by more than 500ppm, but I don't think that matters anymore 
> for timekeeping as we switch to TSC only after the long term calibration. The 
> quick calibration is just for getting the TSC frequency roughly correct for 
> udelay.

Yes - so the frequency being off due to production variances (or sheer firmware 
incompetence) isn't a big deal in itself: having boot-to-boot jitter during fast 
calibration would be, and that's the big question.

The nice thing about the PIT calibration is that it usually provides very little 
jitter, so that driver delays/etc. are as boot-to-boot identical as possible.

Anyway, the HPET might be worth an attempt - but I'd not be surprised if we 
discovered another rotten apple ...

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03 17:04           ` Linus Torvalds
@ 2015-06-03 17:50             ` H. Peter Anvin
  2015-06-04 12:32               ` Ingo Molnar
  0 siblings, 1 reply; 48+ messages in thread
From: H. Peter Anvin @ 2015-06-03 17:50 UTC (permalink / raw)
  To: Linus Torvalds, Thomas Gleixner
  Cc: Ingo Molnar, Adrian Hunter, LKML, Andy Lutomirski, Andi Kleen,
	the arch/x86 maintainers, Len Brown

On 06/03/2015 10:04 AM, Linus Torvalds wrote:
> On Wed, Jun 3, 2015 at 9:47 AM, Thomas Gleixner <tglx@linutronix.de> wrote:
>>
>>> Does anybody know what the base oscillator for HPET tends to be? Also,
>>
>> The most common frequency is 14.318180 MHz.
> 
> I was more thinking along the lines of "which actually crystal is
> driving it" than the frequency.
> 
> That said, that particular frequency does give a clue. That's exactly
> 12x the PIT frequency. So they presumably share the same basic
> crystal, just different PLL's.

Here are the gory details as far as I know them:

This is the default time keeping crystal -- it is 4x the NTSC color
burst frequency, which has been used since the original PC.  It is
actually supposed to be 157.5/11 MHz = 14.31818... MHz ±40 Hz, there is
no zero even though it is often seen in docs.

However, the HPET isn't guaranteed to run at this frequency.  The HPET
frequency is advertised via a control register in HPET space (at offset
0x04, 32-bit COUNTER_CLK_PERIOD in fs).  This register is documented as
readonly

The RTC is probably the most reliable reference clock, in part because
32 kHz crystals are generally calibrated and extremely stable.  However,
to get more than 1 Hz frequency out of it you have to enable interrupts
(which gets you to 8192 Hz).  I have heard claims that this is actually
what Windows uses for calibration, but I don't know.

The ACPI PM_TMR is also fed from the 14.31818 MHz clock () with a
divisor of 4 (putting it exactly at the NTSC color burst frequency of
3.579545... MHz ± 10 Hz).  The PM_TMR, if present, is required to run at
this frequency -- there are no provisions in ACPI for specifying the
frequency.

> Which is a good indication that we should get very similar clock
> stability with the HPET as with the PIC.
> 
> We might even use that kind of knowledge to decide "ok, let's use the
> HPET as the reference clock". IOW, only use the HPET if we find it
> listed, _and_ we get the expected frequency from the HPET_PERIOD
> register.
> 
> Anybody who gives odd HPET_PERIOD values is likely doing somehing
> different and possibly questionable.

It would definitely be fishy if PM_TMR is also supported, since PM_TMR
has to run at a fixed frequency.

The HPET frequency register would be 0x429b176 if run off the 14 MHz clock.

However, if someone does something different the PIC/PM_TMR frequency
might very well be synthesized (or worse, approximated!) and the HPET
might be run off the the real BCLK.  This is a legitimate design.
PIC/PM_TMR being synthesized is fine (might introduce some jitter, but
shouldn't affect calibration), approximated is not...

	-hpa


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03 13:45                           ` Linus Torvalds
@ 2015-06-04 11:28                             ` Adrian Hunter
  0 siblings, 0 replies; 48+ messages in thread
From: Adrian Hunter @ 2015-06-04 11:28 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andi Kleen, Thomas Gleixner, Andy Lutomirski, LKML, X86 ML,
	H. Peter Anvin, Len Brown

On 03/06/15 16:45, Linus Torvalds wrote:
> On Wed, Jun 3, 2015 at 1:13 AM, Adrian Hunter <adrian.hunter@intel.com> wrote:
>>
>> Yeah, so the (only?) downside is the 50ms wasted on Fast TSC
>> calibration.  What about this?
> 
> That's certainly simpler.
> 
> What platform is this?

It's a prototype.



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03 17:50             ` H. Peter Anvin
@ 2015-06-04 12:32               ` Ingo Molnar
  0 siblings, 0 replies; 48+ messages in thread
From: Ingo Molnar @ 2015-06-04 12:32 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Linus Torvalds, Thomas Gleixner, Adrian Hunter, LKML,
	Andy Lutomirski, Andi Kleen, the arch/x86 maintainers, Len Brown


* H. Peter Anvin <hpa@zytor.com> wrote:

> [...]
> 
> The RTC is probably the most reliable reference clock, in part because 32 kHz 
> crystals are generally calibrated and extremely stable.  However, to get more 
> than 1 Hz frequency out of it you have to enable interrupts (which gets you to 
> 8192 Hz).  I have heard claims that this is actually what Windows uses for 
> calibration, but I don't know.

So I ran a few tests to determine how reliable the RTC clock is at 8192 Hz, and 
running it during early bootup gives this type of jitter:

[    0.000000] tsc: RTC IRQ     0, at  30566853736, delta:           30, jitter:           30
[    0.000000] tsc: RTC IRQ     1, at  30567333663, delta:       479927, jitter:       479897
[    0.000000] tsc: RTC IRQ     2, at  30567578651, delta:       244988, jitter:      -234939
[    0.000000] tsc: RTC IRQ     3, at  30567825147, delta:       246496, jitter:         1508
[    0.000000] tsc: RTC IRQ     4, at  30568070180, delta:       245033, jitter:        -1463
[    0.000000] tsc: RTC IRQ     5, at  30568315220, delta:       245040, jitter:            7
[    0.000000] tsc: RTC IRQ     6, at  30568561747, delta:       246527, jitter:         1487
[    0.000000] tsc: RTC IRQ     7, at  30568806794, delta:       245047, jitter:        -1480
[    0.000000] tsc: RTC IRQ     8, at  30569051789, delta:       244995, jitter:          -52
[    0.000000] tsc: RTC IRQ     9, at  30569296824, delta:       245035, jitter:           40
[    0.000000] tsc: RTC IRQ    10, at  30569543304, delta:       246480, jitter:         1445
[    0.000000] tsc: RTC IRQ    11, at  30569788346, delta:       245042, jitter:        -1438
[    0.000000] tsc: RTC IRQ    12, at  30570033393, delta:       245047, jitter:            5
[    0.000000] tsc: RTC IRQ    13, at  30570278433, delta:       245040, jitter:           -7
[    0.000000] tsc: RTC IRQ    14, at  30570524894, delta:       246461, jitter:         1421
[    0.000000] tsc: RTC IRQ    15, at  30570769950, delta:       245056, jitter:        -1405
[    0.000000] tsc: RTC IRQ    16, at  30571014993, delta:       245043, jitter:          -13
[    0.000000] tsc: RTC IRQ    17, at  30571260024, delta:       245031, jitter:          -12
[    0.000000] tsc: RTC IRQ    18, at  30571506504, delta:       246480, jitter:         1449
[    0.000000] tsc: RTC IRQ    19, at  30571751560, delta:       245056, jitter:        -1424
[    0.000000] tsc: RTC IRQ    20, at  30571996577, delta:       245017, jitter:          -39
[    0.000000] tsc: RTC IRQ    21, at  30572243072, delta:       246495, jitter:         1478
[    0.000000] tsc: RTC IRQ    22, at  30572488121, delta:       245049, jitter:        -1446
[    0.000000] tsc: RTC IRQ    23, at  30572733125, delta:       245004, jitter:          -45
[    0.000000] tsc: RTC IRQ    24, at  30572978181, delta:       245056, jitter:           52
[    0.000000] tsc: RTC IRQ    25, at  30573224673, delta:       246492, jitter:         1436
[    0.000000] tsc: RTC IRQ    26, at  30573469712, delta:       245039, jitter:        -1453
[    0.000000] tsc: RTC IRQ    27, at  30573714760, delta:       245048, jitter:            9
[    0.000000] tsc: RTC IRQ    28, at  30573959800, delta:       245040, jitter:           -8
[    0.000000] tsc: RTC IRQ    29, at  30574206272, delta:       246472, jitter:         1432
[    0.000000] tsc: RTC IRQ    30, at  30574451320, delta:       245048, jitter:        -1424
[    0.000000] tsc: RTC IRQ    31, at  30574696335, delta:       245015, jitter:          -33
[    0.000000] tsc: RTC IRQ    32, at  30574941390, delta:       245055, jitter:           40
[    0.000000] tsc: RTC IRQ    33, at  30575187872, delta:       246482, jitter:         1427
[    0.000000] tsc: RTC IRQ    34, at  30575432905, delta:       245033, jitter:        -1449
[    0.000000] tsc: RTC IRQ    35, at  30575677944, delta:       245039, jitter:            6
[    0.000000] tsc: RTC IRQ    36, at  30575924434, delta:       246490, jitter:         1451
[    0.000000] tsc: RTC IRQ    37, at  30576169465, delta:       245031, jitter:        -1459
[    0.000000] tsc: RTC IRQ    38, at  30576414504, delta:       245039, jitter:            8
[    0.000000] tsc: RTC IRQ    39, at  30576659542, delta:       245038, jitter:           -1
[    0.000000] tsc: RTC IRQ    40, at  30576906030, delta:       246488, jitter:         1450
[    0.000000] tsc: RTC IRQ    41, at  30577151072, delta:       245042, jitter:        -1446
[    0.000000] tsc: RTC IRQ    42, at  30577396105, delta:       245033, jitter:           -9
[    0.000000] tsc: RTC IRQ    43, at  30577641144, delta:       245039, jitter:            6
[    0.000000] tsc: RTC IRQ    44, at  30577887632, delta:       246488, jitter:         1449
[    0.000000] tsc: RTC IRQ    45, at  30578132665, delta:       245033, jitter:        -1455
[    0.000000] tsc: RTC IRQ    46, at  30578377704, delta:       245039, jitter:            6
[    0.000000] tsc: RTC IRQ    47, at  30578622749, delta:       245045, jitter:            6
[    0.000000] tsc: RTC IRQ    48, at  30578869230, delta:       246481, jitter:         1436
[    0.000000] tsc: RTC IRQ    49, at  30579114272, delta:       245042, jitter:        -1439
[    0.000000] tsc: RTC IRQ    50, at  30581814501, delta:      2700229, jitter:      2455187
[    0.000000] tsc: RTC IRQ    51, at  30582059549, delta:       245048, jitter:     -2455181
[    0.000000] tsc: RTC IRQ    52, at  30582304584, delta:       245035, jitter:          -13
[    0.000000] tsc: RTC IRQ    53, at  30582549631, delta:       245047, jitter:           12
[    0.000000] tsc: RTC IRQ    54, at  30582796117, delta:       246486, jitter:         1439
[    0.000000] tsc: RTC IRQ    55, at  30583041136, delta:       245019, jitter:        -1467
[    0.000000] tsc: RTC IRQ    56, at  30583286184, delta:       245048, jitter:           29
[    0.000000] tsc: RTC IRQ    57, at  30583532682, delta:       246498, jitter:         1450
[    0.000000] tsc: RTC IRQ    58, at  30583777720, delta:       245038, jitter:        -1460
[    0.000000] tsc: RTC IRQ    59, at  30584022745, delta:       245025, jitter:          -13
[    0.000000] tsc: RTC IRQ    60, at  30584267784, delta:       245039, jitter:           14
[    0.000000] tsc: RTC IRQ    61, at  30584514273, delta:       246489, jitter:         1450
[    0.000000] tsc: RTC IRQ    62, at  30584759311, delta:       245038, jitter:        -1451
[    0.000000] tsc: RTC IRQ    63, at  30585004357, delta:       245046, jitter:            8
[    0.000000] tsc: RTC IRQ    64, at  30585249385, delta:       245028, jitter:          -18
[    0.000000] tsc: RTC IRQ    65, at  30585495880, delta:       246495, jitter:         1467
[    0.000000] tsc: RTC IRQ    66, at  30585740908, delta:       245028, jitter:        -1467
[    0.000000] tsc: RTC IRQ    67, at  30585985953, delta:       245045, jitter:           17
[    0.000000] tsc: RTC IRQ    68, at  30586232425, delta:       246472, jitter:         1427
[    0.000000] tsc: RTC IRQ    69, at  30586477464, delta:       245039, jitter:        -1433
[    0.000000] tsc: RTC IRQ    70, at  30586722493, delta:       245029, jitter:          -10
[    0.000000] tsc: RTC IRQ    71, at  30586967516, delta:       245023, jitter:           -6
[    0.000000] tsc: RTC IRQ    72, at  30587213989, delta:       246473, jitter:         1450
[    0.000000] tsc: RTC IRQ    73, at  30587459029, delta:       245040, jitter:        -1433
[    0.000000] tsc: RTC IRQ    74, at  30587704077, delta:       245048, jitter:            8
[    0.000000] tsc: RTC IRQ    75, at  30587949125, delta:       245048, jitter:            0
[    0.000000] tsc: RTC IRQ    76, at  30588195597, delta:       246472, jitter:         1424
[    0.000000] tsc: RTC IRQ    77, at  30588440629, delta:       245032, jitter:        -1440
[    0.000000] tsc: RTC IRQ    78, at  30588685660, delta:       245031, jitter:           -1
[    0.000000] tsc: RTC IRQ    79, at  30588930716, delta:       245056, jitter:           25
[    0.000000] tsc: RTC IRQ    80, at  30589177189, delta:       246473, jitter:         1417
[    0.000000] tsc: RTC IRQ    81, at  30589422229, delta:       245040, jitter:        -1433
[    0.000000] tsc: RTC IRQ    82, at  30589667285, delta:       245056, jitter:           16
[    0.000000] tsc: RTC IRQ    83, at  30589913760, delta:       246475, jitter:         1419
[    0.000000] tsc: RTC IRQ    84, at  30590158798, delta:       245038, jitter:        -1437
[    0.000000] tsc: RTC IRQ    85, at  30590403837, delta:       245039, jitter:            1
[    0.000000] tsc: RTC IRQ    86, at  30590648866, delta:       245029, jitter:          -10
[    0.000000] tsc: RTC IRQ    87, at  30590895346, delta:       246480, jitter:         1451
[    0.000000] tsc: RTC IRQ    88, at  30591140397, delta:       245051, jitter:        -1429
[    0.000000] tsc: RTC IRQ    89, at  30591385446, delta:       245049, jitter:           -2
[    0.000000] tsc: RTC IRQ    90, at  30591630477, delta:       245031, jitter:          -18
[    0.000000] tsc: RTC IRQ    91, at  30591876949, delta:       246472, jitter:         1441
[    0.000000] tsc: RTC IRQ    92, at  30592121998, delta:       245049, jitter:        -1423
[    0.000000] tsc: RTC IRQ    93, at  30592367029, delta:       245031, jitter:          -18
[    0.000000] tsc: RTC IRQ    94, at  30592612075, delta:       245046, jitter:           15
[    0.000000] tsc: RTC IRQ    95, at  30592858555, delta:       246480, jitter:         1434
[    0.000000] tsc: RTC IRQ    96, at  30593103597, delta:       245042, jitter:        -1438
[    0.000000] tsc: RTC IRQ    97, at  30593348630, delta:       245033, jitter:           -9
[    0.000000] tsc: RTC IRQ    98, at  30593595101, delta:       246471, jitter:         1438
[    0.000000] tsc: RTC IRQ    99, at  30593840157, delta:       245056, jitter:        -1415
[    0.000000] tsc: RTC IRQ   100, at  30594085197, delta:       245040, jitter:          -16

The time measurements are in cycles, the TSC on this box runs at 2.010 GHz.

What you can see is that the jitter is pretty stable, with some harmonics in it.

There's outliners that occur at about 80 Hz frequency:

[    0.000000] tsc: RTC IRQ    50, at  30581814501, delta:      2700229, jitter:      2455187
[    0.000000] tsc: RTC IRQ    51, at  30582059549, delta:       245048, jitter:     -2455181

That's too frequent to be the RTC update delay - but its length, 1.3 msecs, comes 
close to that delay - so maybe that is it. We could filter out these outliner 
during calibration.

With that I think we could get down to a jitter of around 10 ppm with just a few 
dozen samples - i.e. collect it all in just 2-3 milliseconds.

So that seems desirable and in would bring our calibration accuracy to the same 
level as our delayed-work based 'refined TSC' calibration method is.

The big practical problem I found is implementational. Boot dependencies are 
horrible:

 - one problem is that the IO-APIC is not enabled yet when we calibrate these 
   values. With vile hacks I was able to enable the RTC during early boot by 
   forcing an early init of the IO-APIC, but I found no robust method.

 - there's also a catch-22 with time init: IO-APIC init relies on time init. I 
   wasn't successful at untangling it.

 - I also tried to activate the RTC IRQ in the i8259 PIC during early boot, so 
   that we can just use it to sample the RTC and then forget about that state, but
   didn't succeed.

all in one, receiving RTC IRQs so early in the bootup was a pretty fragile affair.

 - Alternatively, I also tried a different method: to set up the RTC periodic IRQ 
   during early boot, but not have an IRQ handler, polling RTC_PF in the 
   rtc_cmos_read(RTC_INTR_FLAGS) IRQ status byte.

   Unfortunately when I do this then PIO based RTC accesses can take tens of
   thousands of cycles, and the resulting jitter is pretty bad and hard to filter:

[    0.000000] tsc: RTC edge 57 from  0 to 64, at  29694678517, delta:         246360, jitter:         2456, loops:            7,        35194 cycles/loop
[    0.000000] tsc: RTC edge 58 from  0 to 64, at  29695169485, delta:         490968, jitter:       244608, loops:          118,         4160 cycles/loop
[    0.000000] tsc: RTC edge 59 from  0 to 64, at  29695413981, delta:         244496, jitter:      -246472, loops:            6,        40749 cycles/loop
[    0.000000] tsc: RTC edge 60 from  0 to 64, at  29695660661, delta:         246680, jitter:         2184, loops:            7,        35240 cycles/loop
[    0.000000] tsc: RTC edge 61 from  0 to 64, at  29695904853, delta:         244192, jitter:        -2488, loops:            6,        40698 cycles/loop
[    0.000000] tsc: RTC edge 62 from  0 to 64, at  29696151141, delta:         246288, jitter:         2096, loops:            7,        35184 cycles/loop
[    0.000000] tsc: RTC edge 63 from  0 to 64, at  29696396445, delta:         245304, jitter:         -984, loops:            6,        40884 cycles/loop
[    0.000000] tsc: RTC edge 64 from  0 to 64, at  29696642669, delta:         246224, jitter:          920, loops:            7,        35174 cycles/loop
[    0.000000] tsc: RTC edge 65 from  0 to 64, at  29696887245, delta:         244576, jitter:        -1648, loops:            6,        40762 cycles/loop
[    0.000000] tsc: RTC edge 66 from  0 to 64, at  29697377909, delta:         490664, jitter:       246088, loops:          117,         4193 cycles/loop
[    0.000000] tsc: RTC edge 67 from  0 to 64, at  29697622701, delta:         244792, jitter:      -245872, loops:            6,        40798 cycles/loop
[    0.000000] tsc: RTC edge 68 from  0 to 64, at  29697868773, delta:         246072, jitter:         1280, loops:            7,        35153 cycles/loop
[    0.000000] tsc: RTC edge 69 from  0 to 64, at  29700569301, delta:        2700528, jitter:      2454456, loops:           13,       207732 cycles/loop
[    0.000000] tsc: RTC edge 70 from  0 to 64, at  29700813805, delta:         244504, jitter:     -2456024, loops:            6,        40750 cycles/loop
[    0.000000] tsc: RTC edge 71 from  0 to 64, at  29701060125, delta:         246320, jitter:         1816, loops:            7,        35188 cycles/loop
[    0.000000] tsc: RTC edge 72 from  0 to 64, at  29701550189, delta:         490064, jitter:       243744, loops:          117,         4188 cycles/loop
[    0.000000] tsc: RTC edge 73 from  0 to 64, at  29701796677, delta:         246488, jitter:      -243576, loops:            7,        35212 cycles/loop
[    0.000000] tsc: RTC edge 74 from  0 to 64, at  29702040829, delta:         244152, jitter:        -2336, loops:            6,        40692 cycles/loop
[    0.000000] tsc: RTC edge 75 from  0 to 64, at  29702287597, delta:         246768, jitter:         2616, loops:            7,        35252 cycles/loop
[    0.000000] tsc: RTC edge 76 from  0 to 64, at  29702531741, delta:         244144, jitter:        -2624, loops:            6,        40690 cycles/loop
[    0.000000] tsc: RTC edge 77 from  0 to 64, at  29702778341, delta:         246600, jitter:         2456, loops:            7,        35228 cycles/loop
[    0.000000] tsc: RTC edge 78 from  0 to 64, at  29703022661, delta:         244320, jitter:        -2280, loops:            6,        40720 cycles/loop
[    0.000000] tsc: RTC edge 79 from  0 to 64, at  29703514245, delta:         491584, jitter:       247264, loops:          118,         4165 cycles/loop
[    0.000000] tsc: RTC edge 80 from  0 to 64, at  29703759165, delta:         244920, jitter:      -246664, loops:            6,        40820 cycles/loop
[    0.000000] tsc: RTC edge 81 from  0 to 64, at  29704005397, delta:         246232, jitter:         1312, loops:            7,        35176 cycles/loop
[    0.000000] tsc: RTC edge 82 from  0 to 64, at  29704249589, delta:         244192, jitter:        -2040, loops:            6,        40698 cycles/loop

   and this was on a pretty standard whitebox PC, I'm pessimistic about how this 
   would work on more exotic machines.

So I think we should stay with the PIT fast-calibration method, it's very fast in 
most cases.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03 16:23                           ` Thomas Gleixner
@ 2015-06-22 11:21                             ` Adrian Hunter
  2015-06-22 13:14                               ` Thomas Gleixner
  2015-06-22 14:12                               ` George Spelvin
  0 siblings, 2 replies; 48+ messages in thread
From: Adrian Hunter @ 2015-06-22 11:21 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andi Kleen, Andy Lutomirski, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown, George Spelvin, Ingo Molnar,
	a.p.zijlstra, arjan, bp, penberg, akpm

On 03/06/15 19:23, Thomas Gleixner wrote:
> On Wed, 3 Jun 2015, Adrian Hunter wrote:
> 
>> On 03/06/15 06:30, Andi Kleen wrote:
>>>> Then the changelog should say that I think.  The current text says
>>>> "Fast TSC calibration will always fail", which, to me, suggests that
>>>> either the slow calibration will work or that the changelog message
>>>> should be changed.
>>>
>>> Ok. No, the slow calibration works I believe.
>>
>> Yeah, so the (only?) downside is the 50ms wasted on Fast TSC
>> calibration.  What about this?
> 
> I'm certainly happy to apply this one. 

George Spelvin began investigating improving quick_pit_calibrate() but Ingo
anyway suggested this patch as the first, so can this be applied?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-22 11:21                             ` Adrian Hunter
@ 2015-06-22 13:14                               ` Thomas Gleixner
  2015-07-06  6:48                                 ` Adrian Hunter
  2015-06-22 14:12                               ` George Spelvin
  1 sibling, 1 reply; 48+ messages in thread
From: Thomas Gleixner @ 2015-06-22 13:14 UTC (permalink / raw)
  To: Adrian Hunter
  Cc: Andi Kleen, Andy Lutomirski, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown, George Spelvin, Ingo Molnar,
	a.p.zijlstra, arjan, bp, penberg, akpm

On Mon, 22 Jun 2015, Adrian Hunter wrote:
> On 03/06/15 19:23, Thomas Gleixner wrote:
> > On Wed, 3 Jun 2015, Adrian Hunter wrote:
> > 
> >> On 03/06/15 06:30, Andi Kleen wrote:
> >>>> Then the changelog should say that I think.  The current text says
> >>>> "Fast TSC calibration will always fail", which, to me, suggests that
> >>>> either the slow calibration will work or that the changelog message
> >>>> should be changed.
> >>>
> >>> Ok. No, the slow calibration works I believe.
> >>
> >> Yeah, so the (only?) downside is the 50ms wasted on Fast TSC
> >> calibration.  What about this?
> > 
> > I'm certainly happy to apply this one. 
> 
> George Spelvin began investigating improving quick_pit_calibrate() but Ingo
> anyway suggested this patch as the first, so can this be applied?

Oh, yes. I simply forgot to pick it up. Thanks for the reminder.

    tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-22 11:21                             ` Adrian Hunter
  2015-06-22 13:14                               ` Thomas Gleixner
@ 2015-06-22 14:12                               ` George Spelvin
  1 sibling, 0 replies; 48+ messages in thread
From: George Spelvin @ 2015-06-22 14:12 UTC (permalink / raw)
  To: adrian.hunter, tglx
  Cc: a.p.zijlstra, ak, akpm, arjan, bp, hpa, lenb, linux-kernel,
	linux, luto, mingo, penberg, torvalds, x86

Adrian Hunter <adrian.hunter@intel.com> wrote:
> On 03/06/15 19:23, Thomas Gleixner wrote:
>> I'm certainly happy to apply this one. 
> 
> George Spelvin began investigating improving quick_pit_calibrate() but Ingo
> anyway suggested this patch as the first, so can this be applied?

Acked-by: George Spelvin <linux@horizon.com>

It's currently the first patch in the series I'm developing, but
if someone wants to beat me to it, no objection.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-22 13:14                               ` Thomas Gleixner
@ 2015-07-06  6:48                                 ` Adrian Hunter
  2015-07-06  7:42                                   ` Thomas Gleixner
  0 siblings, 1 reply; 48+ messages in thread
From: Adrian Hunter @ 2015-07-06  6:48 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andi Kleen, Andy Lutomirski, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown, George Spelvin, Ingo Molnar,
	a.p.zijlstra, arjan, bp, penberg, akpm

On 22/06/15 16:14, Thomas Gleixner wrote:
> On Mon, 22 Jun 2015, Adrian Hunter wrote:
>> On 03/06/15 19:23, Thomas Gleixner wrote:
>>> On Wed, 3 Jun 2015, Adrian Hunter wrote:
>>>
>>>> On 03/06/15 06:30, Andi Kleen wrote:
>>>>>> Then the changelog should say that I think.  The current text says
>>>>>> "Fast TSC calibration will always fail", which, to me, suggests that
>>>>>> either the slow calibration will work or that the changelog message
>>>>>> should be changed.
>>>>>
>>>>> Ok. No, the slow calibration works I believe.
>>>>
>>>> Yeah, so the (only?) downside is the 50ms wasted on Fast TSC
>>>> calibration.  What about this?
>>>
>>> I'm certainly happy to apply this one. 
>>
>> George Spelvin began investigating improving quick_pit_calibrate() but Ingo
>> anyway suggested this patch as the first, so can this be applied?
> 
> Oh, yes. I simply forgot to pick it up. Thanks for the reminder.

Sorry to bother you, but I don't still don't see the patch anywhere?


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-07-06  6:48                                 ` Adrian Hunter
@ 2015-07-06  7:42                                   ` Thomas Gleixner
  0 siblings, 0 replies; 48+ messages in thread
From: Thomas Gleixner @ 2015-07-06  7:42 UTC (permalink / raw)
  To: Adrian Hunter
  Cc: Andi Kleen, Andy Lutomirski, LKML, Linus Torvalds, X86 ML,
	H. Peter Anvin, Len Brown, George Spelvin, Ingo Molnar,
	a.p.zijlstra, arjan, bp, penberg, akpm

On Mon, 6 Jul 2015, Adrian Hunter wrote:

> On 22/06/15 16:14, Thomas Gleixner wrote:
> > On Mon, 22 Jun 2015, Adrian Hunter wrote:
> >> On 03/06/15 19:23, Thomas Gleixner wrote:
> >>> On Wed, 3 Jun 2015, Adrian Hunter wrote:
> >>>
> >>>> On 03/06/15 06:30, Andi Kleen wrote:
> >>>>>> Then the changelog should say that I think.  The current text says
> >>>>>> "Fast TSC calibration will always fail", which, to me, suggests that
> >>>>>> either the slow calibration will work or that the changelog message
> >>>>>> should be changed.
> >>>>>
> >>>>> Ok. No, the slow calibration works I believe.
> >>>>
> >>>> Yeah, so the (only?) downside is the 50ms wasted on Fast TSC
> >>>> calibration.  What about this?
> >>>
> >>> I'm certainly happy to apply this one. 
> >>
> >> George Spelvin began investigating improving quick_pit_calibrate() but Ingo
> >> anyway suggested this patch as the first, so can this be applied?
> > 
> > Oh, yes. I simply forgot to pick it up. Thanks for the reminder.
> 
> Sorry to bother you, but I don't still don't see the patch anywhere?

That patch seems to be highly merge resistant. It's queued now.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [tip:x86/urgent] x86/tsc: Let high latency PIT fail fast in quick_pit_calibrate()
  2015-06-03  8:13                         ` Adrian Hunter
  2015-06-03 13:45                           ` Linus Torvalds
  2015-06-03 16:23                           ` Thomas Gleixner
@ 2015-07-06  7:42                           ` tip-bot for Adrian Hunter
  2 siblings, 0 replies; 48+ messages in thread
From: tip-bot for Adrian Hunter @ 2015-07-06  7:42 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, torvalds, luto, adrian.hunter, tglx, mingo, ak, hpa, lenb

Commit-ID:  5aac644a9944bea93b4f05ced1883a902a2535f6
Gitweb:     http://git.kernel.org/tip/5aac644a9944bea93b4f05ced1883a902a2535f6
Author:     Adrian Hunter <adrian.hunter@intel.com>
AuthorDate: Wed, 3 Jun 2015 10:39:46 +0300
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Mon, 6 Jul 2015 09:41:00 +0200

x86/tsc: Let high latency PIT fail fast in quick_pit_calibrate()

If it takes longer than 12us to read the PIT counter lsb/msb,
then the error margin will never fall below 500ppm within 50ms,
and Fast TSC calibration will always fail.

This patch detects when that will happen and fails fast. Note
the failure message is not printed in that case because:
1. it will always happen on that class of hardware
2. the absence of the message is more informative than its
presence

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/556EB717.9070607@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 5054497..7437b41 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -598,10 +598,19 @@ static unsigned long quick_pit_calibrate(void)
 			if (!pit_expect_msb(0xff-i, &delta, &d2))
 				break;
 
+			delta -= tsc;
+
+			/*
+			 * Extrapolate the error and fail fast if the error will
+			 * never be below 500 ppm.
+			 */
+			if (i == 1 &&
+			    d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
+				return 0;
+
 			/*
 			 * Iterate until the error is less than 500 ppm
 			 */
-			delta -= tsc;
 			if (d1+d2 >= delta >> 11)
 				continue;
 

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-05 20:17               ` George Spelvin
@ 2015-06-06 21:50                 ` George Spelvin
  0 siblings, 0 replies; 48+ messages in thread
From: George Spelvin @ 2015-06-06 21:50 UTC (permalink / raw)
  To: linux, mingo; +Cc: adrian.hunter, ak, hpa, linux-kernel, luto, tglx, torvalds

And, for interest's sake, I went to a somewhat older machine: a 2008
MSI K9A2 platinum with DDR2, a Socket 939 AMD processor, SB600 south
bridge and (most importantly for this) a Fintek F71882FG super-IO
chip providing the PIT and RTC.

Anyway, the results are similar, again with the RTC being faster to
access than the PIT (by 14% this time).

BUT!  Two significant things failed, and I need to dig into why.
First, although my main RTC loop saw the PF edges at the expected
intervals of 2.5 GHz/8192 Hz = 305176, the second loop didn't; it
hit the safety timeout of 10,000 samples.

Second, fast RTC calibration failed for some reason.
I need to dig deeper into why.

Anyway, here's the kernel log, with "MEAN" "Std. Dev" lines
added at the end of each group.

It's interesting that the standard deviation for two PIT reads is lower
than that for one.  If the times were independent, you'd expect it to
be a factor of sqrt(2) higher.

hpet clockevent registered
** 2-byte PIT timing
PIT edge delta  538765, range  12453
PIT edge delta  535720, range  12493
PIT edge delta  535605, range  12418
PIT edge delta  535640, range  12493
PIT edge delta  535760, range  12413
PIT edge delta  535520, range  12493
PIT edge delta  535680, range  12453
PIT edge delta  535715, range  12413
PIT edge delta  541920, range  12493
PIT edge delta  535640, range  12533
PIT edge delta  535565, range  12373
PIT edge delta  535680, range  12458
PIT edge delta  535600, range  12413
PIT edge delta  535760, range  12373
PIT edge delta  535640, range  12493
PIT edge delta  535675, range  12413
PIT edge delta  535840, range  12413
PIT edge delta  541725, range  12373
PIT edge delta  535635, range  12413
PIT edge delta  535565, range  12453
PIT edge delta  535795, range  12453
PIT edge delta  535645, range  12493
PIT edge delta  535635, range  12573
PIT edge delta  535720, range  12493
PIT edge delta  535560, range  12533
PIT edge delta  542000, range  12493
PIT edge delta  535600, range  12493
PIT edge delta  535645, range  12493
PIT edge delta  535715, range  12533
PIT edge delta  535475, range  12493
PIT edge delta  535805, range  12493
PIT edge delta  535600, range  12493
PIT edge delta  535605, range  12453
PIT edge delta  541995, range  12493
PIT edge delta  535675, range  12488
PIT edge delta  535650, range  12493
PIT edge delta  535635, range  12458
PIT edge delta  535680, range  12453
PIT edge delta  535605, range  12413
PIT edge delta  535675, range  12373
PIT edge delta  535725, range  12493
PIT edge delta  541835, range  12413
PIT edge delta  535685, range  12493
PIT edge delta  535635, range  12493
PIT edge delta  535680, range  12413
MEAN:           536420.55      12462.11
Std.Dev:          2012.8          47.95

** 1-byte PIT timing
PIT edge delta  535757, range   6295, iter 166
PIT edge delta  536680, range   6295, iter 169
PIT edge delta  536810, range   6295, iter 169
PIT edge delta  536715, range   6455, iter 169
PIT edge delta  536675, range   6335, iter 169
PIT edge delta  536085, range   6295, iter 169
PIT edge delta  536760, range   6415, iter 169
PIT edge delta  536520, range   6415, iter 169
PIT edge delta  536520, range   6292, iter 169
PIT edge delta  537235, range   6300, iter 169
PIT edge delta  536640, range   6255, iter 169
PIT edge delta  536440, range   6375, iter 169
PIT edge delta  536560, range   6295, iter 169
PIT edge delta  534398, range   6375, iter 168
PIT edge delta  536800, range   6337, iter 169
PIT edge delta  536325, range   6335, iter 169
PIT edge delta  537075, range   6375, iter 169
PIT edge delta  536680, range   6370, iter 169
PIT edge delta  536725, range   6330, iter 169
PIT edge delta  534042, range   6375, iter 168
PIT edge delta  536990, range   6540, iter 169
PIT edge delta  536570, range   6335, iter 169
PIT edge delta  536910, range   6255, iter 169
PIT edge delta  536605, range   6255, iter 169
PIT edge delta  536645, range   6335, iter 169
PIT edge delta  536995, range   6260, iter 169
PIT edge delta  536480, range   6255, iter 169
PIT edge delta  536485, range   6255, iter 169
PIT edge delta  533908, range   6295, iter 168
PIT edge delta  536610, range   6377, iter 169
PIT edge delta  536802, range   6335, iter 169
PIT edge delta  536668, range   6375, iter 169
PIT edge delta  536605, range   6295, iter 169
PIT edge delta  536800, range   6415, iter 169
PIT edge delta  536525, range   6410, iter 169
PIT edge delta  536920, range   6416, iter 169
PIT edge delta  536435, range   6413, iter 169
PIT edge delta  537000, range   6255, iter 169
PIT edge delta  536525, range   6250, iter 169
MEAN:           536459.49       6336.92    168.85
Std.Dev:           735.73         66.94      0.54

** RTC timing
RTC edge delta  303365, range   5535, iter 109
RTC edge delta  306037, range   5735, iter 110
RTC edge delta  306438, range   5615, iter 110
RTC edge delta  303685, range   5535, iter 109
RTC edge delta  306047, range   5535, iter 110
RTC edge delta  609673, range   5495, iter 220
RTC edge delta  306209, range   5535, iter 110
RTC edge delta  303708, range   5535, iter 109
RTC edge delta  305838, range   5535, iter 110
RTC edge delta  306202, range   5535, iter 110
RTC edge delta  609643, range   5495, iter 220
RTC edge delta  306522, range   5535, iter 110
RTC edge delta  303280, range   5535, iter 109
RTC edge delta  306273, range   5537, iter 110
RTC edge delta  303640, range   5535, iter 109
RTC edge delta  306442, range   5490, iter 110
RTC edge delta  306438, range   5535, iter 110
RTC edge delta  303640, range   5535, iter 109
RTC edge delta  306322, range   5535, iter 110
RTC edge delta  303125, range   5455, iter 109
RTC edge delta  306513, range   5535, iter 110
RTC edge delta  306527, range   5575, iter 110
RTC edge delta  303435, range   5535, iter 109
RTC edge delta  306243, range   5535, iter 110
RTC edge delta  303680, range   5695, iter 109
RTC edge delta  306202, range   5535, iter 110
RTC edge delta  306153, range   5540, iter 110
RTC edge delta  303485, range   5535, iter 109
RTC edge delta  306197, range   5497, iter 110
RTC edge delta  303690, range   5535, iter 109
RTC edge delta  306273, range   5535, iter 110
RTC edge delta  306282, range   5535, iter 110
RTC edge delta  303035, range   5535, iter 109
RTC edge delta  306283, range   5495, iter 110
RTC edge delta  306277, range   5535, iter 110
RTC edge delta  303560, range   5535, iter 109
RTC edge delta  305918, range   5530, iter 110
RTC edge delta  306247, range   5535, iter 110
RTC edge delta  303870, range   5535, iter 109
RTC edge delta  306448, range   5535, iter 110
RTC edge delta  303205, range   5695, iter 109
RTC edge delta  306432, range   5735, iter 110
RTC edge delta  303600, range   5535, iter 109
RTC edge delta  306233, range   5455, iter 110
RTC edge delta  306327, range   5535, iter 110
RTC edge delta  303440, range   5535, iter 109
RTC edge delta  306523, range   5535, iter 110
RTC edge delta  303710, range   5535, iter 109
RTC edge delta  306172, range   5495, iter 110
RTC edge delta  306078, range   5535, iter 110
RTC edge delta  303395, range   5535, iter 109
RTC edge delta  306244, range   5655, iter 110
RTC edge delta  306316, range   5535, iter 110
RTC edge delta  303360, range   5575, iter 109
RTC edge delta  306442, range   5655, iter 110
RTC edge delta  305758, range   5535, iter 110
RTC edge delta  303725, range   5540, iter 109
RTC edge delta  306437, range   5495, iter 110
RTC edge delta  303645, range   5535, iter 109
RTC edge delta  306473, range   5535, iter 110
RTC edge delta  303645, range   5535, iter 109
RTC edge delta  306397, range   5495, iter 110
RTC edge delta  306003, range   5655, iter 110
MEAN:           314895.32       5547.13    113.13
Std.Dev:         53818.41         56.89     19.51

** RTC timing details
RTC sample 9937:    2759
RTC sample 9938:    2761
RTC sample 9939:    2759
RTC sample 9940:    2881
RTC sample 9941:    2759
RTC sample 9942:    2756
RTC sample 9943:    2759
RTC sample 9944:    2761
RTC sample 9945:    2759
RTC sample 9946:    2761
RTC sample 9947:    2759
RTC sample 9948:    2761
RTC sample 9949:    2759
RTC sample 9950:    2761
RTC sample 9951:    2759
RTC sample 9952:    2721
RTC sample 9953:    2719
RTC sample 9954:    2761
RTC sample 9955:    2759
RTC sample 9956:    2761
RTC sample 9957:    2799
RTC sample 9958:    2761
RTC sample 9959:    2759
RTC sample 9960:    2761
RTC sample 9961:    2719
RTC sample 9962:    2721
RTC sample 9963:    2759
RTC sample 9964:    2761
RTC sample 9965:    2759
RTC sample 9966:    2761
RTC sample 9967:    2879
RTC sample 9968:    2761 *
RTC sample 9969:    2759
RTC sample 9970:    2761
RTC sample 9971:    2759
RTC sample 9972:    2761
RTC sample 9973:    2759
RTC sample 9974:    2761
RTC sample 9975:    2759
RTC sample 9976:    2761
RTC sample 9977:    2759
RTC sample 9978:    2761
RTC sample 9979:    2759
RTC sample 9980:    2761
RTC sample 9981:    2759
RTC sample 9982:    2761
RTC sample 9983:    2719
RTC sample 9984:    2721
RTC sample 9985:    2764
RTC sample 9986:    2876
RTC sample 9987:    2764
RTC sample 9988:    2801
RTC sample 9989:    2759
RTC sample 9990:    2761
RTC sample 9991:    2759
RTC sample 9992:    2761
RTC sample 9993:    2759
RTC sample 9994:    2761
RTC sample 9995:    2754
RTC sample 9996:    2761
RTC sample 9997:    2759
RTC sample 9998:    2761
RTC sample 9999:    2759
MEAN:               2763.08
Std.Dev:              29.68

tsc: Fast TSC calibration failed
tsc: Unable to calibrate against PIT
tsc: using HPET reference calibration
Using user defined TSC freq: 2500.210 MHz
tsc: Detected 2500.210 MHz processor
Calibrating delay loop (skipped), value calculated using timer frequency.. 5000.42 BogoMIPS (lpj=25002100)

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-05  8:31             ` Ingo Molnar
@ 2015-06-05 20:17               ` George Spelvin
  2015-06-06 21:50                 ` George Spelvin
  0 siblings, 1 reply; 48+ messages in thread
From: George Spelvin @ 2015-06-05 20:17 UTC (permalink / raw)
  To: linux, mingo; +Cc: adrian.hunter, ak, hpa, linux-kernel, luto, tglx, torvalds

> I'll run your code as well, to make sure it's not something bad in my code.

Here's a modified version that uses less stack space (no need to store
all 64 bits of a timestamp), and captures a window around an RTC periodic
flag edge to explore WTF is going on there.

commit 769eba0b589141edca3541cfb1e30e01b806e5cb
Author: George Spelvin <linux@horizon.com>
Date:   Thu Jun 4 22:04:19 2015 -0400

    x86, tsc: Add test code.

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a00f35be..00ff0359 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -22,6 +22,8 @@
 #include <asm/nmi.h>
 #include <asm/x86_init.h>
 
+#include <asm/mc146818rtc.h>
+
 unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
 
@@ -533,15 +535,15 @@ static inline int pit_verify_msb(unsigned char val)
 
 static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
 {
-	int count;
-	u64 tsc = 0, prev_tsc = 0;
+	int count = 0;
+	u64 prev_tsc, tsc = 0;
 
-	for (count = 0; count < 50000; count++) {
-		if (!pit_verify_msb(val))
-			break;
+	do {
+		if (++count > 50000)
+			return 0;
 		prev_tsc = tsc;
 		tsc = get_cycles();
-	}
+	} while (pit_verify_msb(val));
 	*deltap = get_cycles() - prev_tsc;
 	*tscp = tsc;
 
@@ -552,6 +554,177 @@ static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *de
 	return count > 5;
 }
 
+/* Similar, but only a single read.  And returns the number of reads. */
+static inline unsigned
+pit_expect_msb1(unsigned char val, unsigned *tscp, unsigned *deltap)
+{
+	int count = 0;
+	unsigned prev_tsc, tsc = 0;
+
+	do {
+		if (++count > 50000)
+			return 0;
+		prev_tsc = tsc;
+		tsc = (unsigned)get_cycles();
+	} while (inb(0x42) == val);
+	*deltap = (unsigned)get_cycles() - prev_tsc;
+	*tscp = tsc;
+
+	return count;
+}
+
+static inline unsigned
+rtc_wait_bit(unsigned *tscp, unsigned *deltap)
+{
+	int count = 0;
+	unsigned prev_tsc, tsc = 0;
+
+	do {
+		if (++count > 5000)
+			return 0;
+		prev_tsc = tsc;
+		tsc = (unsigned)get_cycles();
+	} while (~inb(RTC_PORT(1)) & RTC_PF);	/* Wait for bit 6 to be set */
+	*deltap = (unsigned)get_cycles() - prev_tsc;
+	*tscp = tsc;
+
+	/*
+	 * We require _some_ success, but the quality control
+	 * will be based on the error terms on the TSC values.
+	 */
+	return count;
+}
+
+#define SAMPLES 64
+
+static void noinline_for_stack
+pit_test(void)
+{
+	unsigned tsc[SAMPLES+1];	/* How long since rpevious edge */
+	unsigned range[SAMPLES+1];	/* Range of uncertainty */
+	unsigned iter[SAMPLES];	/*& Number of iterations for capture */
+	int i, j;
+	unsigned char saved_a, saved_b;
+	unsigned long flags;
+	extern spinlock_t rtc_lock;
+
+	outb(0xb0, 0x43);
+
+	/* Start at 0xffff */
+	outb(0xff, 0x42);
+	outb(0xff, 0x42);
+
+	pit_verify_msb(0);
+
+	/*
+	 * Among the evil non-portable hacks this code does, it exploits
+	 * the fact that x86 is little-endian and allows unaligned stores
+	 * to store 64-bit values into an array of 32-bit values, where
+	 * each one overwrites the high half of the one before.
+	 */
+	if (pit_expect_msb(0xff, (u64 *)tsc, (unsigned long *)range)) {
+		for (i = 1; i < SAMPLES; i++) {
+			if (!pit_expect_msb(0xff - i, (u64 *)(tsc+i), (unsigned long *)(range+i)))
+				break;
+			if (!pit_verify_msb(0xfe - i))
+				break;
+		}
+		printk("** 2-byte PIT timing\n");
+		for (j = 1; j < i; j++)
+			printk("PIT edge delta %7u, range %6u\n",
+				tsc[j] - tsc[j-1], range[j]);
+	}
+
+	/* Try again, with one-byte reads */
+	outb(0xa0, 0x43);
+	outb(0xff, 0x42);	/* Start at 0xff00 */
+
+	/* Wait until we reach 0xfe (should be very fast) */
+	pit_verify_msb(0);
+	for (i = 0; i < 1000; i++)
+		if (inb(0x42) == 0xfe)
+			break;
+
+	if (i < 1000) {
+		for (i = 0; i < SAMPLES; i++) {
+			iter[i] = pit_expect_msb1(0xfe - i, tsc+i, range+i);
+			if (iter[i] <= 5)
+				break;
+			if (inb(0x42) != 0xfd - i)
+				break;
+		}
+		printk("** 1-byte PIT timing\n");
+		for (j = 1; j < i; j++)
+			printk("PIT edge delta %7u, range %6u, iter %u\n",
+				tsc[j] - tsc[j-1], range[j], iter[j]);
+	}
+
+	/* Once more, with the RTC */
+	spin_lock_irqsave(&rtc_lock, flags);
+
+	lock_cmos_prefix(RTC_REG_C);
+/* This is skanky stuff that requries rewritten RTC locking to do properly */
+	outb(RTC_REG_B, RTC_PORT(0));
+	saved_b = inb(RTC_PORT(1));
+	outb(saved_b & 7, RTC_PORT(1));	/* Clear interrupt enables */
+
+	outb(RTC_REG_A, RTC_PORT(0));
+	saved_a = inb(RTC_PORT(1));
+	outb((saved_a & 0xf0) | 3, RTC_PORT(1));	/* Set 8 kHz rate */
+/* End of skanky stuff */
+
+	outb(RTC_REG_C, RTC_PORT(0));
+	inb(RTC_PORT(1));	/* Clear any pending */
+
+	for (i = 0; i < SAMPLES; i++) {
+		iter[i] = rtc_wait_bit(tsc+i, range+i);
+		if (iter[i] <= 3)
+			break;
+		if (inb(RTC_PORT(1)) & RTC_PF)
+			break;
+	}
+
+	lock_cmos_suffix(RTC_REG_C);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
+	printk("** RTC timing\n");
+	for (j = 1; j < i; j++) {
+		printk("RTC edge delta %7u, range %6u, iter %u\n",
+			tsc[j] - tsc[j-1],  range[j], iter[j]);
+	}
+
+	/* Collect different statistics: per-read timing */
+	spin_lock_irqsave(&rtc_lock, flags);
+	lock_cmos_prefix(RTC_REG_C);
+	outb(RTC_REG_C, RTC_PORT(0));
+	inb(RTC_PORT(1));	/* Clear any pending */
+
+	/* Capture a series of timestamps straddling a bit change */
+	j = 10000;
+	for (i = 0; i < j; i++) {
+		tsc[i % (unsigned)SAMPLES] = (unsigned)get_cycles();
+		if (inb(RTC_PORT(1)) & RTC_PF && i >= SAMPLES/2 && j < 10000)
+			j = i + SAMPLES/2;
+	}
+
+	/* Restore the RTC state */
+	outb(RTC_REG_A, RTC_PORT(0));
+	outb(saved_a, RTC_PORT(1));
+	outb(RTC_REG_B, RTC_PORT(0));
+	outb(saved_b, RTC_PORT(1));
+
+	lock_cmos_suffix(RTC_REG_C);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
+	printk("** RTC timing details\n");
+	for (j = 1; j < SAMPLES; j++) {
+		unsigned k = i + j - SAMPLES;
+		printk("RTC sample %3d: %7u%s\n", k,
+			tsc[k % SAMPLES] - tsc[(k-1) % SAMPLES],
+			j == SAMPLES/2 ? " *" : "");
+	}
+}
+
 /*
  * How many MSB values do we want to see? We aim for
  * a maximum error rate of 500ppm (in practice the
@@ -570,6 +743,8 @@ static unsigned long quick_pit_calibrate(void)
 	/* Set the Gate high, disable speaker */
 	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
 
+pit_test();
+
 	/*
 	 * Counter 2, mode 0 (one-shot), binary count
 	 *

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-05  8:24           ` George Spelvin
@ 2015-06-05  8:31             ` Ingo Molnar
  2015-06-05 20:17               ` George Spelvin
  0 siblings, 1 reply; 48+ messages in thread
From: Ingo Molnar @ 2015-06-05  8:31 UTC (permalink / raw)
  To: George Spelvin; +Cc: adrian.hunter, ak, hpa, linux-kernel, luto, tglx, torvalds


* George Spelvin <linux@horizon.com> wrote:

> > Ingo Molnar wrote:
> >* George Spelvin <linux@horizon.com> wrote:
> >> Did you use rtc_cmos_read()?  [...]
> 
> > Yeah, so initially I did, but then after I noticed the overhead I introduced:
> > which compiles to a single INB instruction.
> >
> > This didn't change the delay/cost behavior.
> >
> > The numbers I cited, with tens of thousands of cycles per iteration,
> > were from such an optimized poll loop already.
> 
> Apologies for doubting you!

No apologies needed: I should really have posted my code, but the boot 
dependencies hackery I had to perform was way too embarrasing to post ...

> > note the 'loops' column. When it's around 117, then the read cost corresponds 
> > roughly to the cheap-ish INB cost you have measured: 4188 cycles/loop.
> > 
> > But note the frequent 30-40k cycles/loop outliers. They dominate the 
> > measurement so filtering might not help.
> 
> I don't quite understand hoe the numbers are derived.  Why does 200K
> cycles/loop give 13 loops, while 35K cycles/loop gives 7?  Is cycles/loop
> a maximum?

it's delta/loops. So the 200K line:

[    0.000000] tsc: RTC edge 69 from  0 to 64, at  29700569301, delta:        2700528, jitter:      2454456, loops:           13,       207732 cycles/loop

had a very big 'delta' outlier, ~1.3 msecs when we did not manage to detect any 
RTC edge.

I'll run your code as well, to make sure it's not something bad in my code.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-05  5:58         ` Ingo Molnar
@ 2015-06-05  8:24           ` George Spelvin
  2015-06-05  8:31             ` Ingo Molnar
  0 siblings, 1 reply; 48+ messages in thread
From: George Spelvin @ 2015-06-05  8:24 UTC (permalink / raw)
  To: linux, mingo; +Cc: adrian.hunter, ak, hpa, linux-kernel, luto, tglx, torvalds

> Ingo Molnar wrote:
>* George Spelvin <linux@horizon.com> wrote:
>> Did you use rtc_cmos_read()?  [...]

> Yeah, so initially I did, but then after I noticed the overhead I introduced:
> which compiles to a single INB instruction.
>
> This didn't change the delay/cost behavior.
>
> The numbers I cited, with tens of thousands of cycles per iteration,
> were from such an optimized poll loop already.

Apologies for doubting you!

>> /* This is skanky stuff that requries rewritten RTC locking to do properly */

> [ Note that no RTC locking is needed so early during bootup: this is
>   the boot CPU only, with only a single task running, guaranteed. ]

Yes, I guessed I could get away with it, but I hadn't traced the code
far enough to be sure.  But obviously I should either completely omit the
locking, or do it right.  Half-assed is all-wrong.

> note the 'loops' column. When it's around 117, then the read cost corresponds 
> roughly to the cheap-ish INB cost you have measured: 4188 cycles/loop.
> 
> But note the frequent 30-40k cycles/loop outliers. They dominate the measurement 
> so filtering might not help.

I don't quite understand hoe the numbers are derived.  Why does 200K
cycles/loop give 13 loops, while 35K cycles/loop gives 7?  Is cycles/loop
a maximum?

> And this is on a 'boring' 10 years old PC (Nvidia CK804 southbridge), with no HPET 
> and nothing particularly fancy that I'm aware of. I tried this system first 
> because I expected it to work and expected problems (with RTCs being emulated via 
> the HPET) on more modern systems.
> 
> If the RTC polling method is not reliable here, it might be doubly problematic on 
> other systems.

This is definitely an "if it ain't broke, don't fix it" area.
Trying things is interesting; actually changing the kernel is not
to be undertaken lightly.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-05  5:52           ` George Spelvin
@ 2015-06-05  6:16             ` Ingo Molnar
  0 siblings, 0 replies; 48+ messages in thread
From: Ingo Molnar @ 2015-06-05  6:16 UTC (permalink / raw)
  To: George Spelvin; +Cc: adrian.hunter, ak, hpa, linux-kernel, luto, tglx, torvalds


* George Spelvin <linux@horizon.com> wrote:

> It's running at 3.4 GHz, so I expect 729478 ticks per 256 PIT counts, and 415039 
> ticks per 8192 Hz RTC tick.

> (PIT reads are 1353 ns each, while RTC reads are 1142 ns.)
> 
> RTC edge at  99172986783, delta       0, range   7764, iter 7
> RTC edge at  99173401719, delta  414936, range   7764, iter 106
> RTC edge at  99173816543, delta  414824, range   7764, iter 106
> RTC edge at  99174231391, delta  414848, range   7764, iter 106
> RTC edge at  99174646119, delta  414728, range   7740, iter 106

> +static inline unsigned
> +rtc_wait_bit(u64 *tscp, unsigned long *deltap)
> +{
> +	int count = 0;
> +	u64 prev_tsc, tsc = 0;
> +
> +	do {
> +		if (++count > 5000)
> +			return 0;
> +		prev_tsc = tsc;
> +		tsc = get_cycles();
> +	} while (~inb(RTC_PORT(1)) & RTC_PF);	/* Wait for bit 6 to be set */
> +	*deltap = get_cycles() - prev_tsc;
> +	*tscp = tsc;

> +/* This is skanky stuff that requries rewritten RTC locking to do properly */

[ Note that no RTC locking is needed so early during bootup: this is the boot CPU 
  only, with only a single task running, guaranteed. ]

So your code is very close to how I did the RTC sampling, except that I got:

[    0.000000] tsc: RTC edge 57 from  0 to 64, at  29694678517, delta:         246360, jitter:         2456, loops:            7,        35194 cycles/loop
[    0.000000] tsc: RTC edge 58 from  0 to 64, at  29695169485, delta:         490968, jitter:       244608, loops:          118,         4160 cycles/loop
[    0.000000] tsc: RTC edge 59 from  0 to 64, at  29695413981, delta:         244496, jitter:      -246472, loops:            6,        40749 cycles/loop
[    0.000000] tsc: RTC edge 60 from  0 to 64, at  29695660661, delta:         246680, jitter:         2184, loops:            7,        35240 cycles/loop
[    0.000000] tsc: RTC edge 61 from  0 to 64, at  29695904853, delta:         244192, jitter:        -2488, loops:            6,        40698 cycles/loop
[    0.000000] tsc: RTC edge 62 from  0 to 64, at  29696151141, delta:         246288, jitter:         2096, loops:            7,        35184 cycles/loop
[    0.000000] tsc: RTC edge 63 from  0 to 64, at  29696396445, delta:         245304, jitter:         -984, loops:            6,        40884 cycles/loop
[    0.000000] tsc: RTC edge 64 from  0 to 64, at  29696642669, delta:         246224, jitter:          920, loops:            7,        35174 cycles/loop
[    0.000000] tsc: RTC edge 65 from  0 to 64, at  29696887245, delta:         244576, jitter:        -1648, loops:            6,        40762 cycles/loop
[    0.000000] tsc: RTC edge 66 from  0 to 64, at  29697377909, delta:         490664, jitter:       246088, loops:          117,         4193 cycles/loop
[    0.000000] tsc: RTC edge 67 from  0 to 64, at  29697622701, delta:         244792, jitter:      -245872, loops:            6,        40798 cycles/loop
[    0.000000] tsc: RTC edge 68 from  0 to 64, at  29697868773, delta:         246072, jitter:         1280, loops:            7,        35153 cycles/loop
[    0.000000] tsc: RTC edge 69 from  0 to 64, at  29700569301, delta:        2700528, jitter:      2454456, loops:           13,       207732 cycles/loop
[    0.000000] tsc: RTC edge 70 from  0 to 64, at  29700813805, delta:         244504, jitter:     -2456024, loops:            6,        40750 cycles/loop
[    0.000000] tsc: RTC edge 71 from  0 to 64, at  29701060125, delta:         246320, jitter:         1816, loops:            7,        35188 cycles/loop
[    0.000000] tsc: RTC edge 72 from  0 to 64, at  29701550189, delta:         490064, jitter:       243744, loops:          117,         4188 cycles/loop
[    0.000000] tsc: RTC edge 73 from  0 to 64, at  29701796677, delta:         246488, jitter:      -243576, loops:            7,        35212 cycles/loop
[    0.000000] tsc: RTC edge 74 from  0 to 64, at  29702040829, delta:         244152, jitter:        -2336, loops:            6,        40692 cycles/loop
[    0.000000] tsc: RTC edge 75 from  0 to 64, at  29702287597, delta:         246768, jitter:         2616, loops:            7,        35252 cycles/loop
[    0.000000] tsc: RTC edge 76 from  0 to 64, at  29702531741, delta:         244144, jitter:        -2624, loops:            6,        40690 cycles/loop
[    0.000000] tsc: RTC edge 77 from  0 to 64, at  29702778341, delta:         246600, jitter:         2456, loops:            7,        35228 cycles/loop
[    0.000000] tsc: RTC edge 78 from  0 to 64, at  29703022661, delta:         244320, jitter:        -2280, loops:            6,        40720 cycles/loop
[    0.000000] tsc: RTC edge 79 from  0 to 64, at  29703514245, delta:         491584, jitter:       247264, loops:          118,         4165 cycles/loop
[    0.000000] tsc: RTC edge 80 from  0 to 64, at  29703759165, delta:         244920, jitter:      -246664, loops:            6,        40820 cycles/loop
[    0.000000] tsc: RTC edge 81 from  0 to 64, at  29704005397, delta:         246232, jitter:         1312, loops:            7,        35176 cycles/loop
[    0.000000] tsc: RTC edge 82 from  0 to 64, at  29704249589, delta:         244192, jitter:        -2040, loops:            6,        40698 cycles/loop

note the 'loops' column. When it's around 117, then the read cost corresponds 
roughly to the cheap-ish INB cost you have measured: 4188 cycles/loop.

But note the frequent 30-40k cycles/loop outliers. They dominate the measurement 
so filtering might not help.

And this is on a 'boring' 10 years old PC (Nvidia CK804 southbridge), with no HPET 
and nothing particularly fancy that I'm aware of. I tried this system first 
because I expected it to work and expected problems (with RTCs being emulated via 
the HPET) on more modern systems.

If the RTC polling method is not reliable here, it might be doubly problematic on 
other systems.

IRQ based RTC calibration worked a lot better, but is problematic from the boot 
dependencing POV. It's not unsolvable, but not trivial either - and I'd rather 
prefer trivial (polling) methods so early during the bootup!

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-04 16:38       ` George Spelvin
  2015-06-04 16:52         ` Linus Torvalds
@ 2015-06-05  5:58         ` Ingo Molnar
  2015-06-05  8:24           ` George Spelvin
  1 sibling, 1 reply; 48+ messages in thread
From: Ingo Molnar @ 2015-06-05  5:58 UTC (permalink / raw)
  To: George Spelvin; +Cc: adrian.hunter, ak, hpa, linux-kernel, luto, tglx, torvalds


* George Spelvin <linux@horizon.com> wrote:

> Ingo Molnar wrote:
> > - Alternatively, I also tried a different method: to set up the RTC
> >   periodic IRQ during early boot, but not have an IRQ handler, polling
> >   RTC_PF in the rtc_cmos_read(RTC_INTR_FLAGS) IRQ status byte.
> >
> >   Unfortunately when I do this then PIO based RTC accesses can take
> >   tens of thousands of cycles, and the resulting jitter is pretty bad
> >   and hard to filter:
> 
> Did you use rtc_cmos_read()?  [...]

Yeah, so initially I did, but then after I noticed the overhead I introduced:

+unsigned char rtc_cmos_read_again(void)
+{
+       return inb(RTC_PORT(1));
+}
+

which compiles to a single INB instruction.

This didn't change the delay/cost behavior.

The numbers I cited, with tens of thousands of cycles per iteration, were from 
such an optimized poll loop already.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-04 16:52         ` Linus Torvalds
  2015-06-04 17:54           ` George Spelvin
@ 2015-06-05  5:52           ` George Spelvin
  2015-06-05  6:16             ` Ingo Molnar
  1 sibling, 1 reply; 48+ messages in thread
From: George Spelvin @ 2015-06-05  5:52 UTC (permalink / raw)
  To: mingo; +Cc: adrian.hunter, ak, hpa, linux, linux-kernel, luto, tglx, torvalds

FWIW, I wrote my own test routine, with some interesting results.
It's a rude bodge and obviously not kernel-quality, but included if
anyone wants to mess with it.

My machine is an X79 motherboard with a common ITE IT8728F SuperIO
chip providing both RTC and PIT.

The intersting bit is that I can double the speed of the PIT code, and
the really interesting part is that the RTC code is 18% faster still
(85% of the time).

It's running at 3.4 GHz, so I expect 729478 ticks per 256 PIT
counts, and 415039 ticks per 8192 Hz RTC tick.


Anyway, here are the results using the current pit_expect_msb().
The values printed are the returned tsc, the delta to the previous one,
and the uncertainty range, which is the time for two reads
94 inb() operations).


PIT edge at  99066193034, delta       0, range  18372
PIT edge at  99066918775, delta  725741, range  18372
PIT edge at  99067644199, delta  725424, range  18372
PIT edge at  99068379191, delta  734992, range  18372
PIT edge at  99069104615, delta  725424, range  18372
PIT edge at  99069839127, delta  734512, range  18372
PIT edge at  99070564551, delta  725424, range  18372
PIT edge at  99071299583, delta  735032, range  18348
PIT edge at  99072025530, delta  725947, range  18372
PIT edge at  99072750839, delta  725309, range  18372
PIT edge at  99073485447, delta  734608, range  18372
PIT edge at  99074210778, delta  725331, range  18372
PIT edge at  99074945471, delta  734693, range  18372
PIT edge at  99075670807, delta  725336, range  18372
PIT edge at  99076406543, delta  735736, range  18372
PIT edge at  99077132874, delta  726331, range  18372
PIT edge at  99077858095, delta  725221, range  18372
PIT edge at  99078593719, delta  735624, range  18372
PIT edge at  99079319255, delta  725536, range  18372
PIT edge at  99080053767, delta  734512, range  18372
PIT edge at  99080779079, delta  725312, range  18372
PIT edge at  99081504322, delta  725243, range  18372
PIT edge at  99082239311, delta  734989, range  18372
PIT edge at  99082964554, delta  725243, range  18372
PIT edge at  99083699543, delta  734989, range  18372
PIT edge at  99084425602, delta  726059, range  18372
PIT edge at  99085160095, delta  734493, range  18372
PIT edge at  99085885311, delta  725216, range  18372
PIT edge at  99086610535, delta  725224, range  18372
PIT edge at  99087345751, delta  735216, range  18372
PIT edge at  99088071399, delta  725648, range  18372
PIT edge at  99088805911, delta  734512, range  18372
PIT edge at  99089531519, delta  725608, range  18372
PIT edge at  99090266327, delta  734808, range  18372
PIT edge at  99090991567, delta  725240, range  18372
PIT edge at  99091716767, delta  725200, range  18372
PIT edge at  99092451279, delta  734512, range  18372
PIT edge at  99093176615, delta  725336, range  18487
PIT edge at  99093911423, delta  734808, range  18372
PIT edge at  99094636847, delta  725424, range  18372
PIT edge at  99095371447, delta  734600, range  18372
PIT edge at  99096096671, delta  725224, range  18372
PIT edge at  99096831703, delta  735032, range  18372
PIT edge at  99097557535, delta  725832, range  18372
PIT edge at  99098282959, delta  725424, range  18372
PIT edge at  99099018063, delta  735104, range  18372
PIT edge at  99099743303, delta  725240, range  18372
PIT edge at  99100477703, delta  734400, range  18372
PIT edge at  99101203015, delta  725312, range  18372
PIT edge at  99101937415, delta  734400, range  18372

Here's the same for an optimized PIT routine, which places the PIT in
msbyte-only mode, so only needs one read to poll the PIT.

It also prints the number of iterations inside the PIT spin loop.

Note that it's exactly twice the speed, but the variance
is much higher.

PIT edge at  99131203367, delta       0, range   9215, iter 158
PIT edge at  99131929383, delta  726016, range   9172, iter 157
PIT edge at  99132659519, delta  730136, range   9215, iter 158
PIT edge at  99133389546, delta  730027, range   9172, iter 158
PIT edge at  99134120047, delta  730501, range   9188, iter 158
PIT edge at  99134850095, delta  730048, range   9508, iter 158
PIT edge at  99135580303, delta  730208, range   9188, iter 158
PIT edge at  99136310623, delta  730320, range   9188, iter 158
PIT edge at  99137035935, delta  725312, range   9193, iter 157
PIT edge at  99137765754, delta  729819, range   9172, iter 158
PIT edge at  99138495666, delta  729912, range   9172, iter 158
PIT edge at  99139225578, delta  729912, range   9172, iter 158
PIT edge at  99139955511, delta  729933, range   9172, iter 158
PIT edge at  99140685311, delta  729800, range   9212, iter 158
PIT edge at  99141415743, delta  730432, range   9215, iter 158
PIT edge at  99142146247, delta  730504, range   9169, iter 158
PIT edge at  99142872303, delta  726056, range   9215, iter 157
PIT edge at  99143603031, delta  730728, range   9215, iter 158
PIT edge at  99144333559, delta  730528, range   9169, iter 158
PIT edge at  99145063879, delta  730320, range   9193, iter 158
PIT edge at  99145793791, delta  729912, range   9193, iter 158
PIT edge at  99146519823, delta  726032, range   9191, iter 157
PIT edge at  99147249735, delta  729912, range   9191, iter 158
PIT edge at  99147979559, delta  729824, range   9212, iter 158
PIT edge at  99148709674, delta  730115, range   9172, iter 158
PIT edge at  99149440311, delta  730637, range   9212, iter 158
PIT edge at  99150170743, delta  730432, range   9172, iter 158
PIT edge at  99150896055, delta  725312, range   9212, iter 157
PIT edge at  99151626487, delta  730432, range   9215, iter 158
PIT edge at  99152357103, delta  730616, range   9212, iter 158
PIT edge at  99153087242, delta  730139, range   9172, iter 158
PIT edge at  99153817154, delta  729912, range   9172, iter 158
PIT edge at  99154547271, delta  730117, range   9188, iter 158
PIT edge at  99155272671, delta  725400, range   9188, iter 157
PIT edge at  99156003103, delta  730432, range   9191, iter 158
PIT edge at  99156733423, delta  730320, range   9191, iter 158
PIT edge at  99157464063, delta  730640, range   9188, iter 158
PIT edge at  99158194087, delta  730024, range   9191, iter 158
PIT edge at  99158924498, delta  730411, range   9172, iter 158
PIT edge at  99159650239, delta  725741, range   9191, iter 157
PIT edge at  99160380039, delta  729800, range   9212, iter 158
PIT edge at  99161110583, delta  730544, range   9172, iter 158
PIT edge at  99161840495, delta  729912, range   9215, iter 158
PIT edge at  99162570815, delta  730320, range   9215, iter 158
PIT edge at  99163300911, delta  730096, range   9169, iter 158
PIT edge at  99164030823, delta  729912, range   9169, iter 158
PIT edge at  99164756767, delta  725944, range   9188, iter 157
PIT edge at  99165486703, delta  729936, range   9188, iter 158
PIT edge at  99166216818, delta  730115, range   9196, iter 158
PIT edge at  99166946730, delta  729912, range   9196, iter 158

Now here's the RTC version.  Because the events being times are
faster, the delta is expected to be 415039.  But what's interesting
is that the read time is singificantly lower and (with the
exception of one outlier) more stable.

Is the simpler metastability handling (we're only reading
one bit, rather than an entire counter) causing the difference?

(PIT reads are 1353 ns each, while RTC reads are 1142 ns.)

RTC edge at  99172986783, delta       0, range   7764, iter 7
RTC edge at  99173401719, delta  414936, range   7764, iter 106
RTC edge at  99173816543, delta  414824, range   7764, iter 106
RTC edge at  99174231391, delta  414848, range   7764, iter 106
RTC edge at  99174646119, delta  414728, range   7740, iter 106
RTC edge at  99175061351, delta  415232, range   7764, iter 106
RTC edge at  99175476399, delta  415048, range   7764, iter 106
RTC edge at  99175891223, delta  414824, range   7764, iter 106
RTC edge at  99176306071, delta  414848, range   7764, iter 106
RTC edge at  99176721415, delta  415344, range   7764, iter 106
RTC edge at  99177136551, delta  415136, range   7764, iter 106
RTC edge at  99177552010, delta  415459, range   7764, iter 106
RTC edge at  99177966831, delta  414821, range   7764, iter 106
RTC edge at  99178381567, delta  414736, range   7764, iter 106
RTC edge at  99178797226, delta  415659, range   7764, iter 106
RTC edge at  99179211959, delta  414733, range   7764, iter 106
RTC edge at  99179627007, delta  415048, range   7764, iter 106
RTC edge at  99180041943, delta  414936, range   7764, iter 106
RTC edge at  99180457378, delta  415435, range   7764, iter 106
RTC edge at  99180872335, delta  414957, range   7764, iter 106
RTC edge at  99181287271, delta  414936, range   7764, iter 106
RTC edge at  99181702095, delta  414824, range   7764, iter 106
RTC edge at  99182120815, delta  418720, range   7764, iter 107
RTC edge at  99182535935, delta  415120, range   7764, iter 106
RTC edge at  99182951095, delta  415160, range   7764, iter 106
RTC edge at  99183366215, delta  415120, range   7764, iter 106
RTC edge at  99183781082, delta  414867, range   7764, iter 106
RTC edge at  99184192639, delta  411557, range   7764, iter 105
RTC edge at  99184611431, delta  418792, range   7764, iter 107
RTC edge at  99185026391, delta  414960, range   7764, iter 106
RTC edge at  99185441527, delta  415136, range   7764, iter 106
RTC edge at  99185853135, delta  411608, range   8172, iter 105
RTC edge at  99186268658, delta  415523, range   7764, iter 106
RTC edge at  99186683687, delta  415029, range   7764, iter 106
RTC edge at  99187098415, delta  414728, range   7764, iter 106
RTC edge at  99187513351, delta  414936, range   7764, iter 106
RTC edge at  99187928287, delta  414936, range   7764, iter 106
RTC edge at  99188346895, delta  418608, range   7764, iter 107
RTC edge at  99188761946, delta  415051, range   7764, iter 106
RTC edge at  99189176679, delta  414733, range   7764, iter 106
RTC edge at  99189591818, delta  415139, range   7764, iter 106
RTC edge at  99190006959, delta  415141, range   7764, iter 106
RTC edge at  99190422303, delta  415344, range   7764, iter 106
RTC edge at  99190837463, delta  415160, range   7764, iter 106
RTC edge at  99191249135, delta  411672, range   7764, iter 105
RTC edge at  99191664071, delta  414936, range   7764, iter 106
RTC edge at  99192082679, delta  418608, range   7764, iter 107
RTC edge at  99192494127, delta  411448, range   7764, iter 105
RTC edge at  99192912735, delta  418608, range   7764, iter 107
RTC edge at  99193327602, delta  414867, range   7764, iter 106
tsc: Fast TSC calibration using PIT
Using user defined TSC freq: 3399.965 MHz
tsc: Detected 3399.965 MHz processor
Calibrating delay loop (skipped), value calculated using timer frequency.. 6802.26 BogoMIPS (lpj=11333216)


Here's my ugly code.  The "skanky stuff" is a race condition that will
break if an NMI comes along, but I'm not sure that['s possible this
early in the boot.


diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a00f35be..7399d4c6 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -22,6 +22,8 @@
 #include <asm/nmi.h>
 #include <asm/x86_init.h>
 
+#include <asm/mc146818rtc.h>
+
 unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
 
@@ -533,15 +535,15 @@ static inline int pit_verify_msb(unsigned char val)
 
 static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
 {
-	int count;
-	u64 tsc = 0, prev_tsc = 0;
+	int count = 0;
+	u64 prev_tsc, tsc = 0;
 
-	for (count = 0; count < 50000; count++) {
-		if (!pit_verify_msb(val))
-			break;
+	do {
+		if (++count > 50000)
+			return 0;
 		prev_tsc = tsc;
 		tsc = get_cycles();
-	}
+	} while (pit_verify_msb(val));
 	*deltap = get_cycles() - prev_tsc;
 	*tscp = tsc;
 
@@ -552,6 +554,158 @@ static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *de
 	return count > 5;
 }
 
+/* Similar, but only a single read.  And returns the number of reads. */
+static inline unsigned
+pit_expect_msb1(unsigned char val, u64 *tscp, unsigned long *deltap)
+{
+	int count = 0;
+	u64 prev_tsc, tsc = 0;
+
+	do {
+		if (++count > 50000)
+			return 0;
+		prev_tsc = tsc;
+		tsc = get_cycles();
+	} while (inb(0x42) == val);
+	*deltap = get_cycles() - prev_tsc;
+	*tscp = tsc;
+
+	return count;
+}
+
+static inline unsigned
+rtc_wait_bit(u64 *tscp, unsigned long *deltap)
+{
+	int count = 0;
+	u64 prev_tsc, tsc = 0;
+
+	do {
+		if (++count > 5000)
+			return 0;
+		prev_tsc = tsc;
+		tsc = get_cycles();
+	} while (~inb(RTC_PORT(1)) & RTC_PF);	/* Wait for bit 6 to be set */
+	*deltap = get_cycles() - prev_tsc;
+	*tscp = tsc;
+
+	/*
+	 * We require _some_ success, but the quality control
+	 * will be based on the error terms on the TSC values.
+	 */
+	return count;
+}
+
+#define SAMPLES 50
+static void noinline_for_stack
+pit_test(void)
+{
+	u64 tsc[SAMPLES];
+	unsigned long delta[SAMPLES];
+	unsigned count[SAMPLES];
+	int i;
+	unsigned char saved_a, saved_b;
+	unsigned long flags;
+	extern spinlock_t rtc_lock;
+
+	outb(0xb0, 0x43);
+
+	/* Start at 0xffff */
+	outb(0xff, 0x42);
+	outb(0xff, 0x42);
+
+	pit_verify_msb(0);
+
+	if (pit_expect_msb(0xff, tsc+0, delta+0)) {
+		int j;
+		u64 prev;
+		for (i = 1; i < SAMPLES; i++) {
+			if (!pit_expect_msb(0xff-i, tsc+i, delta+i))
+				break;
+			if (!pit_verify_msb(0xfe - i))
+				break;
+		}
+		prev = tsc[0];
+		for (j = 0; j < i; j++) {
+			printk("PIT edge at %12llu, delta %7llu, range %6lu\n",
+				tsc[j], tsc[j] - prev,  delta[j]);
+			prev = tsc[j];
+		}
+	}
+
+	/* Try again, with one-byte reads */
+	outb(0xa0, 0x43);
+	outb(0xff, 0x42);	/* Start at 0xff00 */
+
+	/* Wait until we reach 0xfe (should be very fast) */
+	pit_verify_msb(0);
+	for (i = 0; i < 1000; i++)
+		if (inb(0x42) == 0xfe)
+			break;
+
+	if (i < 1000) {
+		int j;
+		u64 prev;
+
+		for (i = 0; i < SAMPLES; i++) {
+			count[i] = pit_expect_msb1(0xfe -i, tsc+i, delta+i);
+			if (count[i] <= 5)
+				break;
+			if (inb(0x42) != 0xfd - i)
+				break;
+		}
+		prev = tsc[0];
+		for (j = 0; j < i; j++) {
+			printk("PIT edge at %12llu, delta %7llu, range %6lu, iter %u\n",
+				tsc[j], tsc[j] - prev,  delta[j], count[j]);
+			prev = tsc[j];
+		}
+	}
+
+	/* Once more, with the RTC */
+        spin_lock_irqsave(&rtc_lock, flags);
+
+	lock_cmos_prefix(RTC_REG_C);
+/* This is skanky stuff that requries rewritten RTC locking to do properly */
+	outb(RTC_REG_B, RTC_PORT(0));
+	saved_b = inb(RTC_PORT(1));
+	outb(saved_b & 7, RTC_PORT(1));	/* Clear interrupt enables */
+
+	outb(RTC_REG_A, RTC_PORT(0));
+	saved_a = inb(RTC_PORT(1));
+	outb((saved_a & 0xf0) | 3, RTC_PORT(1));	/* Set 8 kHz rate */
+/* End of skanky stuff */
+
+	outb(RTC_REG_C, RTC_PORT(0));
+	inb(RTC_PORT(1));	/* Clear any pending */
+
+	for (i = 0; i < SAMPLES; i++) {
+		count[i] = rtc_wait_bit(tsc+i, delta+i);
+		if (count[i] <= 3)
+			break;
+		if (inb(RTC_PORT(1)) & RTC_PF)
+			break;
+	}
+
+	outb(RTC_REG_A, RTC_PORT(0));
+	outb(saved_a, RTC_PORT(1));
+	outb(RTC_REG_B, RTC_PORT(0));
+	outb(saved_b, RTC_PORT(1));
+
+	lock_cmos_suffix(RTC_REG_C);
+
+        spin_unlock_irqrestore(&rtc_lock, flags);
+
+	{
+		int j;
+		u64 prev = tsc[0];
+		for (j = 0; j < i; j++) {
+			printk("RTC edge at %12llu, delta %7llu, range %6lu, iter %u\n",
+				tsc[j], tsc[j] - prev,  delta[j], count[j]);
+			prev = tsc[j];
+		}
+	}
+}
+
 /*
  * How many MSB values do we want to see? We aim for
  * a maximum error rate of 500ppm (in practice the
@@ -570,6 +724,8 @@ static unsigned long quick_pit_calibrate(void)
 	/* Set the Gate high, disable speaker */
 	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
 
+pit_test();
+
 	/*
 	 * Counter 2, mode 0 (one-shot), binary count
 	 *

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-04 17:54           ` George Spelvin
@ 2015-06-04 18:07             ` Linus Torvalds
  0 siblings, 0 replies; 48+ messages in thread
From: Linus Torvalds @ 2015-06-04 18:07 UTC (permalink / raw)
  To: George Spelvin
  Cc: Adrian Hunter, Andi Kleen, Peter Anvin,
	Linux Kernel Mailing List, Andy Lutomirski, Ingo Molnar,
	Thomas Gleixner

On Thu, Jun 4, 2015 at 10:54 AM, George Spelvin <linux@horizon.com> wrote:
>
> Actually, the current code uses three timestamps: one before the last
> read of the unwanted value, one in the middle, and one after the
> read of the target value (bit set in this case).

Yes, right you are. All are valid and interesting.

                   Linus

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-04 16:52         ` Linus Torvalds
@ 2015-06-04 17:54           ` George Spelvin
  2015-06-04 18:07             ` Linus Torvalds
  2015-06-05  5:52           ` George Spelvin
  1 sibling, 1 reply; 48+ messages in thread
From: George Spelvin @ 2015-06-04 17:54 UTC (permalink / raw)
  To: linux, torvalds; +Cc: adrian.hunter, ak, hpa, linux-kernel, luto, mingo, tglx

On Thu, 4 Jun 2015 at 09:52:34, Linus Torvalds wrote:
> With the usual "have timestamp both before the read that shows the bit
> set, and after the read" so that you can estimate how big the error
> window is.

Actually, the current code uses three timestamps: one before the last
read of the unwanted value, one in the middle, and one after the
read of the target value (bit set in this case).

The delta beween the outer two is used for error estimaion, and the
middle timestamp is used as the guess for when the clock ticked.

Because, if you properly factor out the common code, an RTC read
is just one inb(), as opposed to two for the PIT, I would hope
it could do better.

> And you definitely want to disable interrupts and make sure nobody
> else reads that register, since reading it will clear it. Although I
> guess that during early boot there shouldn't be any other RTC
> activity..

That's the other reason to factor out the CMOS locking.

There's code in e.g.: arch/x86/kernel/rtc.c:mach_get_cmos_time() or
drivers/rtc/rtc-cmos.c:cmos_nvram_read() that would also benefit from
not acquiring and releasing the lock around every single one-byte read.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-04 16:38       ` George Spelvin
@ 2015-06-04 16:52         ` Linus Torvalds
  2015-06-04 17:54           ` George Spelvin
  2015-06-05  5:52           ` George Spelvin
  2015-06-05  5:58         ` Ingo Molnar
  1 sibling, 2 replies; 48+ messages in thread
From: Linus Torvalds @ 2015-06-04 16:52 UTC (permalink / raw)
  To: George Spelvin
  Cc: Ingo Molnar, Adrian Hunter, Andi Kleen, Peter Anvin,
	Linux Kernel Mailing List, Andy Lutomirski, Thomas Gleixner

On Thu, Jun 4, 2015 at 9:38 AM, George Spelvin <linux@horizon.com> wrote:
>
> Also, you don't have to enable interrupts in the RTC to get the PF bit
> set periodically.  You only program the interval (register A lsbits),
> not the IRQ (register B bit 6).  In fact, disabling the interrupt is
> probably safer.

Also, I don't know what Ingo's test-code looked like, but it is
probably best to set the RTC to 8kHz, and then time a few iterations
of "count cycles between PF gets set" rather than "wait for bit to get
set after programming". With the usual "have timestamp both before the
read that shows the bit set, and after the read" so that you can
estimate how big the error window is.

Maybe that's what Ingo's jitter numbers already are, without seeing
what the test-code was it's hard to guess.

And you definitely want to disable interrupts and make sure nobody
else reads that register, since reading it will clear it. Although I
guess that during early boot there shouldn't be any other RTC
activity..

                        Linus

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03 19:07     ` George Spelvin
@ 2015-06-04 16:38       ` George Spelvin
  2015-06-04 16:52         ` Linus Torvalds
  2015-06-05  5:58         ` Ingo Molnar
  0 siblings, 2 replies; 48+ messages in thread
From: George Spelvin @ 2015-06-04 16:38 UTC (permalink / raw)
  To: mingo; +Cc: adrian.hunter, ak, hpa, linux-kernel, luto, tglx, torvalds

Ingo Molnar wrote:
> - Alternatively, I also tried a different method: to set up the RTC
>   periodic IRQ during early boot, but not have an IRQ handler, polling
>   RTC_PF in the rtc_cmos_read(RTC_INTR_FLAGS) IRQ status byte.
>
>   Unfortunately when I do this then PIO based RTC accesses can take
>   tens of thousands of cycles, and the resulting jitter is pretty bad
>   and hard to filter:

Did you use rtc_cmos_read()?  Because that's a rather complex function with
lots of locking, to allow NMI access to CMOS.  (Basically, it keeps
a shadow copy of the address register, which the NMI puts back after
it does its nested access.)

You want to do the locking and selection of the register to read
just once, and have just the raw inb() in the loop.  The resultant
code looks like this:


	lock_cmos_prefix(RTC_REG_C);
	outb(RTC_REG_C, RTC_PORT(0));

	for (lots of iterations) {
		flag = inb(RTC_PORT(1)) & 0x40;
	}

	lock_cmos_suffix(RTC_REG_C);

That should be a *lot* better.

Also, you don't have to enable interrupts in the RTC to get the PF bit
set periodically.  You only program the interval (register A lsbits),
not the IRQ (register B bit 6).  In fact, disabling the interrupt is
probably safer.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03 18:48   ` H. Peter Anvin
@ 2015-06-03 19:07     ` George Spelvin
  2015-06-04 16:38       ` George Spelvin
  0 siblings, 1 reply; 48+ messages in thread
From: George Spelvin @ 2015-06-03 19:07 UTC (permalink / raw)
  To: hpa, linux; +Cc: adrian.hunter, ak, linux-kernel, luto, tglx, torvalds

> Not guaranteed either, and I know for a fact there are platforms out
> there which synthesize the RTC clock.

Interesting!  And surprising.  Doesn't that take more battery power?

> Ah, I wasn't aware of the PF (not PE) bit.  That suddenly makes it a lot
> more interesting.  So polling for the PF bit suddenly makes sense, and
> is probably the single best option for calibration.

I had forgotten about it, too.  But adapting the current calibration
loop to it is trivial.  You lose the ability to detect drasitcally
delayed reads, but the current code barely uses that.

> Well, on x86 hopefully the entropy problem should soon be history...

And the installed base will be replaced when?  You may recall a discussion
in December 2012 to *not* drop 486 support because people were still
using embedded clones of it with modern kernels.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03 18:29 ` George Spelvin
@ 2015-06-03 18:48   ` H. Peter Anvin
  2015-06-03 19:07     ` George Spelvin
  0 siblings, 1 reply; 48+ messages in thread
From: H. Peter Anvin @ 2015-06-03 18:48 UTC (permalink / raw)
  To: George Spelvin; +Cc: adrian.hunter, ak, linux-kernel, luto, tglx, torvalds

On 06/03/2015 11:29 AM, George Spelvin wrote:
> 
> Indeed, it's the only one which is guaranteed a separate crystal.
> Many low-cost chipsets generate ALL other frequencies from one crystal
> with PLLs.
> 

Not guaranteed either, and I know for a fact there are platforms out
there which synthesize the RTC clock.

> But as I mentioned earlier, you *can* get higher frequencies with
> interrupts *or* polling.  When you program the periodic event frequency
> (from 2 to 8192 Hz), it does three things at that rate:
> 
> 1) Periodic interrupts (if enabled),
> 2) Square wave output (if enabled, and relevant to discrete chips only), and
> 3) Sets the PE bit (register C, bit 6), which is auto-cleared on read.

Ah, I wasn't aware of the PF (not PE) bit.  That suddenly makes it a lot
more interesting.  So polling for the PF bit suddenly makes sense, and
is probably the single best option for calibration.

> So if you're willing to poll the device (which the TSC calibration does
> already), you can get high resolution tick edges without interrupts.
> 
> Because it's only one read (port 0x71), it's slightly faster than the PIT.
> 
> (I also wish we could use all those TSC reads for initial entropy seeding
> somehow.)

Well, on x86 hopefully the entropy problem should soon be history...

	-hpa


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
  2015-06-03  6:27 George Spelvin
@ 2015-06-03 18:29 ` George Spelvin
  2015-06-03 18:48   ` H. Peter Anvin
  0 siblings, 1 reply; 48+ messages in thread
From: George Spelvin @ 2015-06-03 18:29 UTC (permalink / raw)
  To: hpa; +Cc: adrian.hunter, ak, linux-kernel, linux, luto, tglx, torvalds

H. Peter Anvin wrote:
> The RTC is probably the most reliable reference clock, in part because
> 32 kHz crystals are generally calibrated and extremely stable.  However,
> to get more than 1 Hz frequency out of it you have to enable interrupts
> (which gets you to 8192 Hz).

Indeed, it's the only one which is guaranteed a separate crystal.
Many low-cost chipsets generate ALL other frequencies from one crystal
with PLLs.

(This is why I'm keen on using the RTC interrupt for an entropy
source.)

But as I mentioned earlier, you *can* get higher frequencies with
interrupts *or* polling.  When you program the periodic event frequency
(from 2 to 8192 Hz), it does three things at that rate:

1) Periodic interrupts (if enabled),
2) Square wave output (if enabled, and relevant to discrete chips only), and
3) Sets the PE bit (register C, bit 6), which is auto-cleared on read.

So if you're willing to poll the device (which the TSC calibration does
already), you can get high resolution tick edges without interrupts.

Because it's only one read (port 0x71), it's slightly faster than the PIT.

(I also wish we could use all those TSC reads for initial entropy seeding
somehow.)

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate()
@ 2015-06-03  6:27 George Spelvin
  2015-06-03 18:29 ` George Spelvin
  0 siblings, 1 reply; 48+ messages in thread
From: George Spelvin @ 2015-06-03  6:27 UTC (permalink / raw)
  To: torvalds; +Cc: adrian.hunter, ak, linux, linux-kernel, luto, tglx

Linus wrote:
> The only *well-defined* clock in a modern PC seems to still remain the
> PIT. Yes, it's very sad.  But all the other clocks tend to be
> untrustworthy for various reasons

Actually, there is one more: the CMOS RTC clock is quite reliably 32768 Hz.

The reas process is very similar, although you only have a single PE bit
rather than a count for sanity checking.  You can program a rate between
2 Hz and 8192 Hz at which the PE bit (register C, 0x06) will be set.

A rate of 4096 Hz would work similarly to the current PIT-based
1193182/256 = 4661 Hz code.

Then you just poll until you capture the transition (it's cleared
automatically by read) and do similar filtering.


(It would also be very nifty to use some of the values collected by
this calibration to seed boot-time entropy.)

^ permalink raw reply	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2015-07-06  7:42 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-21  7:55 [PATCH RFC] x86, tsc: Allow for high latency in quick_pit_calibrate() Adrian Hunter
2015-06-01  7:57 ` Adrian Hunter
2015-06-02 13:58   ` Thomas Gleixner
2015-06-02 19:33 ` Thomas Gleixner
2015-06-02 19:41   ` Andy Lutomirski
2015-06-02 19:43     ` Andi Kleen
2015-06-02 19:58       ` Thomas Gleixner
2015-06-02 20:03         ` Andy Lutomirski
2015-06-02 20:20           ` Andi Kleen
2015-06-02 21:03             ` Thomas Gleixner
2015-06-02 23:38               ` Andi Kleen
2015-06-03  0:21                 ` Andy Lutomirski
2015-06-03  0:39                   ` Andi Kleen
2015-06-03  0:58                     ` Andy Lutomirski
2015-06-03  3:30                       ` Andi Kleen
2015-06-03  8:13                         ` Adrian Hunter
2015-06-03 13:45                           ` Linus Torvalds
2015-06-04 11:28                             ` Adrian Hunter
2015-06-03 16:23                           ` Thomas Gleixner
2015-06-22 11:21                             ` Adrian Hunter
2015-06-22 13:14                               ` Thomas Gleixner
2015-07-06  6:48                                 ` Adrian Hunter
2015-07-06  7:42                                   ` Thomas Gleixner
2015-06-22 14:12                               ` George Spelvin
2015-07-06  7:42                           ` [tip:x86/urgent] x86/tsc: Let high latency PIT fail fast " tip-bot for Adrian Hunter
2015-06-03  4:20   ` [PATCH RFC] x86, tsc: Allow for high latency " Linus Torvalds
2015-06-03  6:20     ` Ingo Molnar
2015-06-03 13:43       ` Linus Torvalds
2015-06-03 16:47         ` Thomas Gleixner
2015-06-03 17:04           ` Linus Torvalds
2015-06-03 17:50             ` H. Peter Anvin
2015-06-04 12:32               ` Ingo Molnar
2015-06-03 17:06           ` Ingo Molnar
2015-06-03  6:27 George Spelvin
2015-06-03 18:29 ` George Spelvin
2015-06-03 18:48   ` H. Peter Anvin
2015-06-03 19:07     ` George Spelvin
2015-06-04 16:38       ` George Spelvin
2015-06-04 16:52         ` Linus Torvalds
2015-06-04 17:54           ` George Spelvin
2015-06-04 18:07             ` Linus Torvalds
2015-06-05  5:52           ` George Spelvin
2015-06-05  6:16             ` Ingo Molnar
2015-06-05  5:58         ` Ingo Molnar
2015-06-05  8:24           ` George Spelvin
2015-06-05  8:31             ` Ingo Molnar
2015-06-05 20:17               ` George Spelvin
2015-06-06 21:50                 ` George Spelvin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.