xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/6] x86/time: improve cross-CPU clock monotonicity (and more)
@ 2016-08-03 12:55 Jan Beulich
  2016-08-03 13:00 ` [PATCH v2 1/6] x86/time: calibrate TSC against platform timer Jan Beulich
                   ` (5 more replies)
  0 siblings, 6 replies; 11+ messages in thread
From: Jan Beulich @ 2016-08-03 12:55 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

1: calibrate TSC against platform timer
2: correctly honor late clearing of TSC related feature flags
3: support 32-bit wide ACPI PM timer
4: fold recurring code
5: group time stamps into a structure
6: relax barriers

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Only patch 1 changed (see there) and a new 6th patch got added.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 1/6] x86/time: calibrate TSC against platform timer
  2016-08-03 12:55 [PATCH v2 0/6] x86/time: improve cross-CPU clock monotonicity (and more) Jan Beulich
@ 2016-08-03 13:00 ` Jan Beulich
  2016-08-03 13:07   ` Andrew Cooper
  2016-08-03 13:00 ` [PATCH v2 2/6] x86/time: correctly honor late clearing of TSC related feature flags Jan Beulich
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2016-08-03 13:00 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 9378 bytes --]

... instead of unconditionally against the PIT. This allows for local
and master system times to remain in better sync (which matters even
when, on any modern system, the master time is really used only during
secondary CPU bringup, as the error between the two is in fact
noticable in cross-CPU NOW() invocation monotonicity).

This involves moving the init_platform_timer() invocation into
early_time_init(), splitting out the few things which really need to be
done in init_xen_time(). That in turn allows dropping the open coded
PIT initialization from init_IRQ() (it was needed for APIC clock
calibration, which runs between early_time_init() and init_xen_time()).

In the course of this re-ordering also set the timer channel 2 gate low
after having finished calibration. This should be benign to overall
system operation, but appears to be the more clean state.

Also do away with open coded 8254 register manipulation from 8259 code.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Ditch preinit_pit() call from init_IRQ(). Adjust style.

--- a/xen/arch/x86/i8259.c
+++ b/xen/arch/x86/i8259.c
@@ -359,13 +359,6 @@ void __init init_IRQ(void)
 
     apic_intr_init();
 
-    /* Set the clock to HZ Hz */
-#define CLOCK_TICK_RATE 1193182 /* crystal freq (Hz) */
-#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
-    outb_p(0x34, PIT_MODE);        /* binary, mode 2, LSB/MSB, ch 0 */
-    outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
-    outb(LATCH >> 8, PIT_CH0);     /* MSB */
-
     setup_irq(2, 0, &cascade);
 }
 
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -59,7 +59,7 @@ struct platform_timesource {
     char *name;
     u64 frequency;
     u64 (*read_counter)(void);
-    int (*init)(struct platform_timesource *);
+    s64 (*init)(struct platform_timesource *);
     void (*resume)(struct platform_timesource *);
     int counter_bits;
 };
@@ -224,49 +224,18 @@ static struct irqaction __read_mostly ir
     timer_interrupt, "timer", NULL
 };
 
-/* ------ Calibrate the TSC ------- 
- * Return processor ticks per second / CALIBRATE_FRAC.
- */
-
 #define CLOCK_TICK_RATE 1193182 /* system crystal frequency (Hz) */
 #define CALIBRATE_FRAC  20      /* calibrate over 50ms */
-#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
+#define CALIBRATE_VALUE(freq) (((freq) + CALIBRATE_FRAC / 2) / CALIBRATE_FRAC)
 
-static u64 init_pit_and_calibrate_tsc(void)
+static void preinit_pit(void)
 {
-    u64 start, end;
-    unsigned long count;
-
     /* Set PIT channel 0 to HZ Hz. */
 #define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
     outb_p(0x34, PIT_MODE);        /* binary, mode 2, LSB/MSB, ch 0 */
     outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
     outb(LATCH >> 8, PIT_CH0);     /* MSB */
-
-    /* Set the Gate high, disable speaker */
-    outb((inb(0x61) & ~0x02) | 0x01, 0x61);
-
-    /*
-     * Now let's take care of CTC channel 2
-     *
-     * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on
-     * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
-     * to begin countdown.
-     */
-    outb(0xb0, PIT_MODE);           /* binary, mode 0, LSB/MSB, Ch 2 */
-    outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
-    outb(CALIBRATE_LATCH >> 8, PIT_CH2);   /* MSB of count */
-
-    start = rdtsc_ordered();
-    for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
-        continue;
-    end = rdtsc_ordered();
-
-    /* Error if the CTC doesn't behave itself. */
-    if ( count == 0 )
-        return 0;
-
-    return ((end - start) * (u64)CALIBRATE_FRAC);
+#undef LATCH
 }
 
 void set_time_scale(struct time_scale *ts, u64 ticks_per_sec)
@@ -327,10 +296,49 @@ static u64 read_pit_count(void)
     return count32;
 }
 
-static int __init init_pit(struct platform_timesource *pts)
+static s64 __init init_pit(struct platform_timesource *pts)
 {
+    u8 portb = inb(0x61);
+    u64 start, end;
+    unsigned long count;
+
     using_pit = 1;
-    return 1;
+
+    /* Set the Gate high, disable speaker. */
+    outb((portb & ~0x02) | 0x01, 0x61);
+
+    /*
+     * Now let's take care of CTC channel 2: mode 0, (interrupt on
+     * terminal count mode), binary count, load CALIBRATE_LATCH count,
+     * (LSB and MSB) to begin countdown.
+     */
+#define CALIBRATE_LATCH CALIBRATE_VALUE(CLOCK_TICK_RATE)
+    outb(0xb0, PIT_MODE);                  /* binary, mode 0, LSB/MSB, Ch 2 */
+    outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
+    outb(CALIBRATE_LATCH >> 8, PIT_CH2);   /* MSB of count */
+#undef CALIBRATE_LATCH
+
+    start = rdtsc_ordered();
+    for ( count = 0; !(inb(0x61) & 0x20); ++count )
+        continue;
+    end = rdtsc_ordered();
+
+    /* Set the Gate low, disable speaker. */
+    outb(portb & ~0x03, 0x61);
+
+    /* Error if the CTC doesn't behave itself. */
+    if ( count == 0 )
+        return 0;
+
+    return (end - start) * CALIBRATE_FRAC;
+}
+
+static void resume_pit(struct platform_timesource *pts)
+{
+    /* Set CTC channel 2 to mode 0 again; initial value does not matter. */
+    outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
+    outb(0, PIT_CH2);     /* LSB of count */
+    outb(0, PIT_CH2);     /* MSB of count */
 }
 
 static struct platform_timesource __initdata plt_pit =
@@ -340,7 +348,8 @@ static struct platform_timesource __init
     .frequency = CLOCK_TICK_RATE,
     .read_counter = read_pit_count,
     .counter_bits = 32,
-    .init = init_pit
+    .init = init_pit,
+    .resume = resume_pit,
 };
 
 /************************************************************
@@ -352,15 +361,26 @@ static u64 read_hpet_count(void)
     return hpet_read32(HPET_COUNTER);
 }
 
-static int __init init_hpet(struct platform_timesource *pts)
+static s64 __init init_hpet(struct platform_timesource *pts)
 {
-    u64 hpet_rate = hpet_setup();
+    u64 hpet_rate = hpet_setup(), start;
+    u32 count, target;
 
     if ( hpet_rate == 0 )
         return 0;
 
     pts->frequency = hpet_rate;
-    return 1;
+
+    count = hpet_read32(HPET_COUNTER);
+    start = rdtsc_ordered();
+    target = count + CALIBRATE_VALUE(hpet_rate);
+    if ( target < count )
+        while ( hpet_read32(HPET_COUNTER) >= count )
+            continue;
+    while ( hpet_read32(HPET_COUNTER) < target )
+        continue;
+
+    return (rdtsc_ordered() - start) * CALIBRATE_FRAC;
 }
 
 static void resume_hpet(struct platform_timesource *pts)
@@ -392,12 +412,24 @@ static u64 read_pmtimer_count(void)
     return inl(pmtmr_ioport);
 }
 
-static int __init init_pmtimer(struct platform_timesource *pts)
+static s64 __init init_pmtimer(struct platform_timesource *pts)
 {
+    u64 start;
+    u32 count, target, mask = 0xffffff;
+
     if ( pmtmr_ioport == 0 )
         return 0;
 
-    return 1;
+    count = inl(pmtmr_ioport) & mask;
+    start = rdtsc_ordered();
+    target = count + CALIBRATE_VALUE(ACPI_PM_FREQUENCY);
+    if ( target < count )
+        while ( (inl(pmtmr_ioport) & mask) >= count )
+            continue;
+    while ( (inl(pmtmr_ioport) & mask) < target )
+        continue;
+
+    return (rdtsc_ordered() - start) * CALIBRATE_FRAC;
 }
 
 static struct platform_timesource __initdata plt_pmtimer =
@@ -533,14 +565,15 @@ static void resume_platform_timer(void)
     plt_stamp = plt_src.read_counter();
 }
 
-static void __init init_platform_timer(void)
+static u64 __init init_platform_timer(void)
 {
     static struct platform_timesource * __initdata plt_timers[] = {
         &plt_hpet, &plt_pmtimer, &plt_pit
     };
 
     struct platform_timesource *pts = NULL;
-    int i, rc = -1;
+    unsigned int i;
+    s64 rc = -1;
 
     if ( opt_clocksource[0] != '\0' )
     {
@@ -578,15 +611,12 @@ static void __init init_platform_timer(v
 
     plt_overflow_period = scale_delta(
         1ull << (pts->counter_bits-1), &plt_scale);
-    init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
     plt_src = *pts;
-    plt_overflow(NULL);
-
-    platform_timer_stamp = plt_stamp64;
-    stime_platform_stamp = NOW();
 
     printk("Platform timer is %s %s\n",
            freq_string(pts->frequency), pts->name);
+
+    return rc;
 }
 
 u64 stime2tsc(s_time_t stime)
@@ -1474,7 +1504,11 @@ int __init init_xen_time(void)
     /* NB. get_cmos_time() can take over one second to execute. */
     do_settime(get_cmos_time(), 0, NOW());
 
-    init_platform_timer();
+    /* Finish platform timer initialization. */
+    init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
+    plt_overflow(NULL);
+    platform_timer_stamp = plt_stamp64;
+    stime_platform_stamp = NOW();
 
     init_percpu_time();
 
@@ -1489,7 +1523,10 @@ int __init init_xen_time(void)
 void __init early_time_init(void)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
-    u64 tmp = init_pit_and_calibrate_tsc();
+    u64 tmp;
+
+    preinit_pit();
+    tmp = init_platform_timer();
 
     set_time_scale(&t->tsc_scale, tmp);
     t->local_tsc_stamp = boot_tsc_stamp;
@@ -1598,7 +1635,7 @@ int time_suspend(void)
 
 int time_resume(void)
 {
-    init_pit_and_calibrate_tsc();
+    preinit_pit();
 
     resume_platform_timer();
 



[-- Attachment #2: x86-time-TSC-calibrate-plt.patch --]
[-- Type: text/plain, Size: 9424 bytes --]

x86/time: calibrate TSC against platform timer

... instead of unconditionally against the PIT. This allows for local
and master system times to remain in better sync (which matters even
when, on any modern system, the master time is really used only during
secondary CPU bringup, as the error between the two is in fact
noticable in cross-CPU NOW() invocation monotonicity).

This involves moving the init_platform_timer() invocation into
early_time_init(), splitting out the few things which really need to be
done in init_xen_time(). That in turn allows dropping the open coded
PIT initialization from init_IRQ() (it was needed for APIC clock
calibration, which runs between early_time_init() and init_xen_time()).

In the course of this re-ordering also set the timer channel 2 gate low
after having finished calibration. This should be benign to overall
system operation, but appears to be the more clean state.

Also do away with open coded 8254 register manipulation from 8259 code.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Ditch preinit_pit() call from init_IRQ(). Adjust style.

--- a/xen/arch/x86/i8259.c
+++ b/xen/arch/x86/i8259.c
@@ -359,13 +359,6 @@ void __init init_IRQ(void)
 
     apic_intr_init();
 
-    /* Set the clock to HZ Hz */
-#define CLOCK_TICK_RATE 1193182 /* crystal freq (Hz) */
-#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
-    outb_p(0x34, PIT_MODE);        /* binary, mode 2, LSB/MSB, ch 0 */
-    outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
-    outb(LATCH >> 8, PIT_CH0);     /* MSB */
-
     setup_irq(2, 0, &cascade);
 }
 
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -59,7 +59,7 @@ struct platform_timesource {
     char *name;
     u64 frequency;
     u64 (*read_counter)(void);
-    int (*init)(struct platform_timesource *);
+    s64 (*init)(struct platform_timesource *);
     void (*resume)(struct platform_timesource *);
     int counter_bits;
 };
@@ -224,49 +224,18 @@ static struct irqaction __read_mostly ir
     timer_interrupt, "timer", NULL
 };
 
-/* ------ Calibrate the TSC ------- 
- * Return processor ticks per second / CALIBRATE_FRAC.
- */
-
 #define CLOCK_TICK_RATE 1193182 /* system crystal frequency (Hz) */
 #define CALIBRATE_FRAC  20      /* calibrate over 50ms */
-#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
+#define CALIBRATE_VALUE(freq) (((freq) + CALIBRATE_FRAC / 2) / CALIBRATE_FRAC)
 
-static u64 init_pit_and_calibrate_tsc(void)
+static void preinit_pit(void)
 {
-    u64 start, end;
-    unsigned long count;
-
     /* Set PIT channel 0 to HZ Hz. */
 #define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
     outb_p(0x34, PIT_MODE);        /* binary, mode 2, LSB/MSB, ch 0 */
     outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
     outb(LATCH >> 8, PIT_CH0);     /* MSB */
-
-    /* Set the Gate high, disable speaker */
-    outb((inb(0x61) & ~0x02) | 0x01, 0x61);
-
-    /*
-     * Now let's take care of CTC channel 2
-     *
-     * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on
-     * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
-     * to begin countdown.
-     */
-    outb(0xb0, PIT_MODE);           /* binary, mode 0, LSB/MSB, Ch 2 */
-    outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
-    outb(CALIBRATE_LATCH >> 8, PIT_CH2);   /* MSB of count */
-
-    start = rdtsc_ordered();
-    for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
-        continue;
-    end = rdtsc_ordered();
-
-    /* Error if the CTC doesn't behave itself. */
-    if ( count == 0 )
-        return 0;
-
-    return ((end - start) * (u64)CALIBRATE_FRAC);
+#undef LATCH
 }
 
 void set_time_scale(struct time_scale *ts, u64 ticks_per_sec)
@@ -327,10 +296,49 @@ static u64 read_pit_count(void)
     return count32;
 }
 
-static int __init init_pit(struct platform_timesource *pts)
+static s64 __init init_pit(struct platform_timesource *pts)
 {
+    u8 portb = inb(0x61);
+    u64 start, end;
+    unsigned long count;
+
     using_pit = 1;
-    return 1;
+
+    /* Set the Gate high, disable speaker. */
+    outb((portb & ~0x02) | 0x01, 0x61);
+
+    /*
+     * Now let's take care of CTC channel 2: mode 0, (interrupt on
+     * terminal count mode), binary count, load CALIBRATE_LATCH count,
+     * (LSB and MSB) to begin countdown.
+     */
+#define CALIBRATE_LATCH CALIBRATE_VALUE(CLOCK_TICK_RATE)
+    outb(0xb0, PIT_MODE);                  /* binary, mode 0, LSB/MSB, Ch 2 */
+    outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
+    outb(CALIBRATE_LATCH >> 8, PIT_CH2);   /* MSB of count */
+#undef CALIBRATE_LATCH
+
+    start = rdtsc_ordered();
+    for ( count = 0; !(inb(0x61) & 0x20); ++count )
+        continue;
+    end = rdtsc_ordered();
+
+    /* Set the Gate low, disable speaker. */
+    outb(portb & ~0x03, 0x61);
+
+    /* Error if the CTC doesn't behave itself. */
+    if ( count == 0 )
+        return 0;
+
+    return (end - start) * CALIBRATE_FRAC;
+}
+
+static void resume_pit(struct platform_timesource *pts)
+{
+    /* Set CTC channel 2 to mode 0 again; initial value does not matter. */
+    outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
+    outb(0, PIT_CH2);     /* LSB of count */
+    outb(0, PIT_CH2);     /* MSB of count */
 }
 
 static struct platform_timesource __initdata plt_pit =
@@ -340,7 +348,8 @@ static struct platform_timesource __init
     .frequency = CLOCK_TICK_RATE,
     .read_counter = read_pit_count,
     .counter_bits = 32,
-    .init = init_pit
+    .init = init_pit,
+    .resume = resume_pit,
 };
 
 /************************************************************
@@ -352,15 +361,26 @@ static u64 read_hpet_count(void)
     return hpet_read32(HPET_COUNTER);
 }
 
-static int __init init_hpet(struct platform_timesource *pts)
+static s64 __init init_hpet(struct platform_timesource *pts)
 {
-    u64 hpet_rate = hpet_setup();
+    u64 hpet_rate = hpet_setup(), start;
+    u32 count, target;
 
     if ( hpet_rate == 0 )
         return 0;
 
     pts->frequency = hpet_rate;
-    return 1;
+
+    count = hpet_read32(HPET_COUNTER);
+    start = rdtsc_ordered();
+    target = count + CALIBRATE_VALUE(hpet_rate);
+    if ( target < count )
+        while ( hpet_read32(HPET_COUNTER) >= count )
+            continue;
+    while ( hpet_read32(HPET_COUNTER) < target )
+        continue;
+
+    return (rdtsc_ordered() - start) * CALIBRATE_FRAC;
 }
 
 static void resume_hpet(struct platform_timesource *pts)
@@ -392,12 +412,24 @@ static u64 read_pmtimer_count(void)
     return inl(pmtmr_ioport);
 }
 
-static int __init init_pmtimer(struct platform_timesource *pts)
+static s64 __init init_pmtimer(struct platform_timesource *pts)
 {
+    u64 start;
+    u32 count, target, mask = 0xffffff;
+
     if ( pmtmr_ioport == 0 )
         return 0;
 
-    return 1;
+    count = inl(pmtmr_ioport) & mask;
+    start = rdtsc_ordered();
+    target = count + CALIBRATE_VALUE(ACPI_PM_FREQUENCY);
+    if ( target < count )
+        while ( (inl(pmtmr_ioport) & mask) >= count )
+            continue;
+    while ( (inl(pmtmr_ioport) & mask) < target )
+        continue;
+
+    return (rdtsc_ordered() - start) * CALIBRATE_FRAC;
 }
 
 static struct platform_timesource __initdata plt_pmtimer =
@@ -533,14 +565,15 @@ static void resume_platform_timer(void)
     plt_stamp = plt_src.read_counter();
 }
 
-static void __init init_platform_timer(void)
+static u64 __init init_platform_timer(void)
 {
     static struct platform_timesource * __initdata plt_timers[] = {
         &plt_hpet, &plt_pmtimer, &plt_pit
     };
 
     struct platform_timesource *pts = NULL;
-    int i, rc = -1;
+    unsigned int i;
+    s64 rc = -1;
 
     if ( opt_clocksource[0] != '\0' )
     {
@@ -578,15 +611,12 @@ static void __init init_platform_timer(v
 
     plt_overflow_period = scale_delta(
         1ull << (pts->counter_bits-1), &plt_scale);
-    init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
     plt_src = *pts;
-    plt_overflow(NULL);
-
-    platform_timer_stamp = plt_stamp64;
-    stime_platform_stamp = NOW();
 
     printk("Platform timer is %s %s\n",
            freq_string(pts->frequency), pts->name);
+
+    return rc;
 }
 
 u64 stime2tsc(s_time_t stime)
@@ -1474,7 +1504,11 @@ int __init init_xen_time(void)
     /* NB. get_cmos_time() can take over one second to execute. */
     do_settime(get_cmos_time(), 0, NOW());
 
-    init_platform_timer();
+    /* Finish platform timer initialization. */
+    init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
+    plt_overflow(NULL);
+    platform_timer_stamp = plt_stamp64;
+    stime_platform_stamp = NOW();
 
     init_percpu_time();
 
@@ -1489,7 +1523,10 @@ int __init init_xen_time(void)
 void __init early_time_init(void)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
-    u64 tmp = init_pit_and_calibrate_tsc();
+    u64 tmp;
+
+    preinit_pit();
+    tmp = init_platform_timer();
 
     set_time_scale(&t->tsc_scale, tmp);
     t->local_tsc_stamp = boot_tsc_stamp;
@@ -1598,7 +1635,7 @@ int time_suspend(void)
 
 int time_resume(void)
 {
-    init_pit_and_calibrate_tsc();
+    preinit_pit();
 
     resume_platform_timer();
 

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 2/6] x86/time: correctly honor late clearing of TSC related feature flags
  2016-08-03 12:55 [PATCH v2 0/6] x86/time: improve cross-CPU clock monotonicity (and more) Jan Beulich
  2016-08-03 13:00 ` [PATCH v2 1/6] x86/time: calibrate TSC against platform timer Jan Beulich
@ 2016-08-03 13:00 ` Jan Beulich
  2016-08-17 13:41   ` Ping: " Jan Beulich
  2016-08-03 13:01 ` [PATCH v2 3/6] x86/time: support 32-bit wide ACPI PM timer Jan Beulich
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2016-08-03 13:00 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 3233 bytes --]

As such clearing of flags may have an impact on the selected rendezvous
function, handle such in a central place.

But don't allow such feature flags to be cleared during CPU hotplug:
Platform and local system times may have diverged significantly by
then, potentially causing noticably (even if only temporary) strange
behavior. As we're anyway expecting only sufficiently similar CPUs to
appear during hotplug, this shouldn't be introducing new limitations.

Reported-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>

--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -1162,8 +1162,8 @@ static int mwait_idle_cpu_init(struct no
 		}
 
 		if (state > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
-		    !pm_idle_save)
-			setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+		    !pm_idle_save && system_state < SYS_STATE_active)
+			clear_tsc_cap(X86_FEATURE_TSC_RELIABLE);
 
 		cx = dev->states + dev->count;
 		cx->type = state;
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1353,6 +1353,24 @@ static void time_calibration(void *unuse
                      &r, 1);
 }
 
+void __init clear_tsc_cap(unsigned int feature)
+{
+    void (*rendezvous_fn)(void *) = time_calibration_std_rendezvous;
+
+    if ( feature )
+        setup_clear_cpu_cap(feature);
+
+    /* If we have constant-rate TSCs then scale factor can be shared. */
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+    {
+        /* If TSCs are not marked as 'reliable', re-sync during rendezvous. */
+        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
+            rendezvous_fn = time_calibration_tsc_rendezvous;
+    }
+
+    time_calibration_rendezvous_fn = rendezvous_fn;
+}
+
 static struct {
     s_time_t local_stime, master_stime;
 } ap_bringup_ref;
@@ -1478,7 +1496,7 @@ static int __init verify_tsc_reliability
         {
             printk("%s: TSC warp detected, disabling TSC_RELIABLE\n",
                    __func__);
-            setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+            clear_tsc_cap(X86_FEATURE_TSC_RELIABLE);
         }
     }
 
@@ -1491,13 +1509,7 @@ int __init init_xen_time(void)
 {
     tsc_check_writability();
 
-    /* If we have constant-rate TSCs then scale factor can be shared. */
-    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
-    {
-        /* If TSCs are not marked as 'reliable', re-sync during rendezvous. */
-        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
-            time_calibration_rendezvous_fn = time_calibration_tsc_rendezvous;
-    }
+    clear_tsc_cap(0);
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 
--- a/xen/include/asm-x86/time.h
+++ b/xen/include/asm-x86/time.h
@@ -70,6 +70,7 @@ void tsc_get_info(struct domain *d, uint
 void force_update_vcpu_system_time(struct vcpu *v);
 
 int host_tsc_is_safe(void);
+void clear_tsc_cap(unsigned int feature);
 void cpuid_time_leaf(uint32_t sub_idx, uint32_t *eax, uint32_t *ebx,
                      uint32_t *ecx, uint32_t *edx);
 




[-- Attachment #2: x86-time-late-feature-disable.patch --]
[-- Type: text/plain, Size: 3299 bytes --]

x86/time: correctly honor late clearing of TSC related feature flags

As such clearing of flags may have an impact on the selected rendezvous
function, handle such in a central place.

But don't allow such feature flags to be cleared during CPU hotplug:
Platform and local system times may have diverged significantly by
then, potentially causing noticably (even if only temporary) strange
behavior. As we're anyway expecting only sufficiently similar CPUs to
appear during hotplug, this shouldn't be introducing new limitations.

Reported-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>

--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -1162,8 +1162,8 @@ static int mwait_idle_cpu_init(struct no
 		}
 
 		if (state > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
-		    !pm_idle_save)
-			setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+		    !pm_idle_save && system_state < SYS_STATE_active)
+			clear_tsc_cap(X86_FEATURE_TSC_RELIABLE);
 
 		cx = dev->states + dev->count;
 		cx->type = state;
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1353,6 +1353,24 @@ static void time_calibration(void *unuse
                      &r, 1);
 }
 
+void __init clear_tsc_cap(unsigned int feature)
+{
+    void (*rendezvous_fn)(void *) = time_calibration_std_rendezvous;
+
+    if ( feature )
+        setup_clear_cpu_cap(feature);
+
+    /* If we have constant-rate TSCs then scale factor can be shared. */
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+    {
+        /* If TSCs are not marked as 'reliable', re-sync during rendezvous. */
+        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
+            rendezvous_fn = time_calibration_tsc_rendezvous;
+    }
+
+    time_calibration_rendezvous_fn = rendezvous_fn;
+}
+
 static struct {
     s_time_t local_stime, master_stime;
 } ap_bringup_ref;
@@ -1478,7 +1496,7 @@ static int __init verify_tsc_reliability
         {
             printk("%s: TSC warp detected, disabling TSC_RELIABLE\n",
                    __func__);
-            setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+            clear_tsc_cap(X86_FEATURE_TSC_RELIABLE);
         }
     }
 
@@ -1491,13 +1509,7 @@ int __init init_xen_time(void)
 {
     tsc_check_writability();
 
-    /* If we have constant-rate TSCs then scale factor can be shared. */
-    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
-    {
-        /* If TSCs are not marked as 'reliable', re-sync during rendezvous. */
-        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
-            time_calibration_rendezvous_fn = time_calibration_tsc_rendezvous;
-    }
+    clear_tsc_cap(0);
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 
--- a/xen/include/asm-x86/time.h
+++ b/xen/include/asm-x86/time.h
@@ -70,6 +70,7 @@ void tsc_get_info(struct domain *d, uint
 void force_update_vcpu_system_time(struct vcpu *v);
 
 int host_tsc_is_safe(void);
+void clear_tsc_cap(unsigned int feature);
 void cpuid_time_leaf(uint32_t sub_idx, uint32_t *eax, uint32_t *ebx,
                      uint32_t *ecx, uint32_t *edx);
 

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 3/6] x86/time: support 32-bit wide ACPI PM timer
  2016-08-03 12:55 [PATCH v2 0/6] x86/time: improve cross-CPU clock monotonicity (and more) Jan Beulich
  2016-08-03 13:00 ` [PATCH v2 1/6] x86/time: calibrate TSC against platform timer Jan Beulich
  2016-08-03 13:00 ` [PATCH v2 2/6] x86/time: correctly honor late clearing of TSC related feature flags Jan Beulich
@ 2016-08-03 13:01 ` Jan Beulich
  2016-08-03 13:02 ` [PATCH v2 4/6] x86/time: fold recurring code Jan Beulich
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2016-08-03 13:01 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 2578 bytes --]

I have no idea why we didn't do so from the beginning.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -481,22 +481,23 @@ static int __init acpi_parse_fadt(struct
 	if (fadt->header.revision >= FADT2_REVISION_ID) {
 		/* FADT rev. 2 */
 		if (fadt->xpm_timer_block.space_id ==
-		    ACPI_ADR_SPACE_SYSTEM_IO)
+		    ACPI_ADR_SPACE_SYSTEM_IO) {
 			pmtmr_ioport = fadt->xpm_timer_block.address;
-		/*
-		 * "X" fields are optional extensions to the original V1.0
-		 * fields, so we must selectively expand V1.0 fields if the
-		 * corresponding X field is zero.
-	 	 */
-		if (!pmtmr_ioport)
-			pmtmr_ioport = fadt->pm_timer_block;
-	} else {
-		/* FADT rev. 1 */
+			pmtmr_width = fadt->xpm_timer_block.bit_width;
+		}
+	}
+	/*
+	 * "X" fields are optional extensions to the original V1.0
+	 * fields, so we must selectively expand V1.0 fields if the
+	 * corresponding X field is zero.
+ 	 */
+	if (!pmtmr_ioport) {
 		pmtmr_ioport = fadt->pm_timer_block;
+		pmtmr_width = fadt->pm_timer_length == 4 ? 24 : 0;
 	}
 	if (pmtmr_ioport)
-		printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
-		       pmtmr_ioport);
+		printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x (%u bits)\n",
+		       pmtmr_ioport, pmtmr_width);
 #endif
 
 	acpi_smi_cmd       = fadt->smi_command;
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -403,6 +403,7 @@ static struct platform_timesource __init
  */
 
 u32 __read_mostly pmtmr_ioport;
+unsigned int __initdata pmtmr_width;
 
 /* ACPI PM timer ticks at 3.579545 MHz. */
 #define ACPI_PM_FREQUENCY 3579545
@@ -417,9 +418,15 @@ static s64 __init init_pmtimer(struct pl
     u64 start;
     u32 count, target, mask = 0xffffff;
 
-    if ( pmtmr_ioport == 0 )
+    if ( !pmtmr_ioport || !pmtmr_width )
         return 0;
 
+    if ( pmtmr_width == 32 )
+    {
+        pts->counter_bits = 32;
+        mask = 0xffffffff;
+    }
+
     count = inl(pmtmr_ioport) & mask;
     start = rdtsc_ordered();
     target = count + CALIBRATE_VALUE(ACPI_PM_FREQUENCY);
--- a/xen/include/asm-x86/acpi.h
+++ b/xen/include/asm-x86/acpi.h
@@ -146,6 +146,7 @@ extern u32 x86_acpiid_to_apicid[];
 #define INVALID_ACPIID		(-1U)
 
 extern u32 pmtmr_ioport;
+extern unsigned int pmtmr_width;
 
 int acpi_dmar_init(void);
 void acpi_mmcfg_init(void);




[-- Attachment #2: x86-PMTMR-32bit.patch --]
[-- Type: text/plain, Size: 2619 bytes --]

x86/time: support 32-bit wide ACPI PM timer

I have no idea why we didn't do so from the beginning.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -481,22 +481,23 @@ static int __init acpi_parse_fadt(struct
 	if (fadt->header.revision >= FADT2_REVISION_ID) {
 		/* FADT rev. 2 */
 		if (fadt->xpm_timer_block.space_id ==
-		    ACPI_ADR_SPACE_SYSTEM_IO)
+		    ACPI_ADR_SPACE_SYSTEM_IO) {
 			pmtmr_ioport = fadt->xpm_timer_block.address;
-		/*
-		 * "X" fields are optional extensions to the original V1.0
-		 * fields, so we must selectively expand V1.0 fields if the
-		 * corresponding X field is zero.
-	 	 */
-		if (!pmtmr_ioport)
-			pmtmr_ioport = fadt->pm_timer_block;
-	} else {
-		/* FADT rev. 1 */
+			pmtmr_width = fadt->xpm_timer_block.bit_width;
+		}
+	}
+	/*
+	 * "X" fields are optional extensions to the original V1.0
+	 * fields, so we must selectively expand V1.0 fields if the
+	 * corresponding X field is zero.
+ 	 */
+	if (!pmtmr_ioport) {
 		pmtmr_ioport = fadt->pm_timer_block;
+		pmtmr_width = fadt->pm_timer_length == 4 ? 24 : 0;
 	}
 	if (pmtmr_ioport)
-		printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
-		       pmtmr_ioport);
+		printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x (%u bits)\n",
+		       pmtmr_ioport, pmtmr_width);
 #endif
 
 	acpi_smi_cmd       = fadt->smi_command;
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -403,6 +403,7 @@ static struct platform_timesource __init
  */
 
 u32 __read_mostly pmtmr_ioport;
+unsigned int __initdata pmtmr_width;
 
 /* ACPI PM timer ticks at 3.579545 MHz. */
 #define ACPI_PM_FREQUENCY 3579545
@@ -417,9 +418,15 @@ static s64 __init init_pmtimer(struct pl
     u64 start;
     u32 count, target, mask = 0xffffff;
 
-    if ( pmtmr_ioport == 0 )
+    if ( !pmtmr_ioport || !pmtmr_width )
         return 0;
 
+    if ( pmtmr_width == 32 )
+    {
+        pts->counter_bits = 32;
+        mask = 0xffffffff;
+    }
+
     count = inl(pmtmr_ioport) & mask;
     start = rdtsc_ordered();
     target = count + CALIBRATE_VALUE(ACPI_PM_FREQUENCY);
--- a/xen/include/asm-x86/acpi.h
+++ b/xen/include/asm-x86/acpi.h
@@ -146,6 +146,7 @@ extern u32 x86_acpiid_to_apicid[];
 #define INVALID_ACPIID		(-1U)
 
 extern u32 pmtmr_ioport;
+extern unsigned int pmtmr_width;
 
 int acpi_dmar_init(void);
 void acpi_mmcfg_init(void);

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 4/6] x86/time: fold recurring code
  2016-08-03 12:55 [PATCH v2 0/6] x86/time: improve cross-CPU clock monotonicity (and more) Jan Beulich
                   ` (2 preceding siblings ...)
  2016-08-03 13:01 ` [PATCH v2 3/6] x86/time: support 32-bit wide ACPI PM timer Jan Beulich
@ 2016-08-03 13:02 ` Jan Beulich
  2016-08-03 13:03 ` [PATCH v2 5/6] x86/time: group time stamps into a structure Jan Beulich
  2016-08-03 13:04 ` [PATCH v2 6/6] x86/time: relax barriers Jan Beulich
  5 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2016-08-03 13:02 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 2421 bytes --]

Common code between time_calibration_{std,tsc}_rendezvous() can better
live in a single place, eliminating the risk of adjusting one without
the other.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1258,6 +1258,18 @@ struct calibration_rendezvous {
     u64 master_tsc_stamp;
 };
 
+static void
+time_calibration_rendezvous_tail(const struct calibration_rendezvous *r)
+{
+    struct cpu_calibration *c = &this_cpu(cpu_calibration);
+
+    c->local_tsc_stamp = rdtsc_ordered();
+    c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
+    c->stime_master_stamp = r->master_stime;
+
+    raise_softirq(TIME_CALIBRATE_SOFTIRQ);
+}
+
 /*
  * Keep TSCs in sync when they run at the same rate, but may stop in
  * deep-sleep C states.
@@ -1265,7 +1277,6 @@ struct calibration_rendezvous {
 static void time_calibration_tsc_rendezvous(void *_r)
 {
     int i;
-    struct cpu_calibration *c = &this_cpu(cpu_calibration);
     struct calibration_rendezvous *r = _r;
     unsigned int total_cpus = cpumask_weight(&r->cpu_calibration_map);
 
@@ -1306,17 +1317,12 @@ static void time_calibration_tsc_rendezv
         }
     }
 
-    c->local_tsc_stamp = rdtsc_ordered();
-    c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
-    c->stime_master_stamp = r->master_stime;
-
-    raise_softirq(TIME_CALIBRATE_SOFTIRQ);
+    time_calibration_rendezvous_tail(r);
 }
 
 /* Ordinary rendezvous function which does not modify TSC values. */
 static void time_calibration_std_rendezvous(void *_r)
 {
-    struct cpu_calibration *c = &this_cpu(cpu_calibration);
     struct calibration_rendezvous *r = _r;
     unsigned int total_cpus = cpumask_weight(&r->cpu_calibration_map);
 
@@ -1336,11 +1342,7 @@ static void time_calibration_std_rendezv
         mb(); /* receive signal /then/ read r->master_stime */
     }
 
-    c->local_tsc_stamp = rdtsc_ordered();
-    c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
-    c->stime_master_stamp = r->master_stime;
-
-    raise_softirq(TIME_CALIBRATE_SOFTIRQ);
+    time_calibration_rendezvous_tail(r);
 }
 
 static void (*time_calibration_rendezvous_fn)(void *) =




[-- Attachment #2: x86-time-calibration-fold.patch --]
[-- Type: text/plain, Size: 2448 bytes --]

x86/time: fold recurring code

Common code between time_calibration_{std,tsc}_rendezvous() can better
live in a single place, eliminating the risk of adjusting one without
the other.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1258,6 +1258,18 @@ struct calibration_rendezvous {
     u64 master_tsc_stamp;
 };
 
+static void
+time_calibration_rendezvous_tail(const struct calibration_rendezvous *r)
+{
+    struct cpu_calibration *c = &this_cpu(cpu_calibration);
+
+    c->local_tsc_stamp = rdtsc_ordered();
+    c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
+    c->stime_master_stamp = r->master_stime;
+
+    raise_softirq(TIME_CALIBRATE_SOFTIRQ);
+}
+
 /*
  * Keep TSCs in sync when they run at the same rate, but may stop in
  * deep-sleep C states.
@@ -1265,7 +1277,6 @@ struct calibration_rendezvous {
 static void time_calibration_tsc_rendezvous(void *_r)
 {
     int i;
-    struct cpu_calibration *c = &this_cpu(cpu_calibration);
     struct calibration_rendezvous *r = _r;
     unsigned int total_cpus = cpumask_weight(&r->cpu_calibration_map);
 
@@ -1306,17 +1317,12 @@ static void time_calibration_tsc_rendezv
         }
     }
 
-    c->local_tsc_stamp = rdtsc_ordered();
-    c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
-    c->stime_master_stamp = r->master_stime;
-
-    raise_softirq(TIME_CALIBRATE_SOFTIRQ);
+    time_calibration_rendezvous_tail(r);
 }
 
 /* Ordinary rendezvous function which does not modify TSC values. */
 static void time_calibration_std_rendezvous(void *_r)
 {
-    struct cpu_calibration *c = &this_cpu(cpu_calibration);
     struct calibration_rendezvous *r = _r;
     unsigned int total_cpus = cpumask_weight(&r->cpu_calibration_map);
 
@@ -1336,11 +1342,7 @@ static void time_calibration_std_rendezv
         mb(); /* receive signal /then/ read r->master_stime */
     }
 
-    c->local_tsc_stamp = rdtsc_ordered();
-    c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
-    c->stime_master_stamp = r->master_stime;
-
-    raise_softirq(TIME_CALIBRATE_SOFTIRQ);
+    time_calibration_rendezvous_tail(r);
 }
 
 static void (*time_calibration_rendezvous_fn)(void *) =

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 5/6] x86/time: group time stamps into a structure
  2016-08-03 12:55 [PATCH v2 0/6] x86/time: improve cross-CPU clock monotonicity (and more) Jan Beulich
                   ` (3 preceding siblings ...)
  2016-08-03 13:02 ` [PATCH v2 4/6] x86/time: fold recurring code Jan Beulich
@ 2016-08-03 13:03 ` Jan Beulich
  2016-08-03 13:04 ` [PATCH v2 6/6] x86/time: relax barriers Jan Beulich
  5 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2016-08-03 13:03 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 12194 bytes --]

If that had been done from the beginning, mistakes like the one
corrected in commit b64438c7c1 ("x86/time: use correct (local) time
stamp in constant-TSC calibration fast path") would likely never have
happened.

Also add a few "const" to make more obvious when things aren't expected
to change.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -47,10 +47,14 @@ unsigned long __read_mostly cpu_khz;  /*
 DEFINE_SPINLOCK(rtc_lock);
 unsigned long pit0_ticks;
 
+struct cpu_time_stamp {
+    u64 local_tsc;
+    s_time_t local_stime;
+    s_time_t master_stime;
+};
+
 struct cpu_time {
-    u64 local_tsc_stamp;
-    s_time_t stime_local_stamp;
-    s_time_t stime_master_stamp;
+    struct cpu_time_stamp stamp;
     struct time_scale tsc_scale;
 };
 
@@ -118,7 +122,7 @@ static inline u32 mul_frac(u32 multiplic
  * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
  * yielding a 64-bit result.
  */
-u64 scale_delta(u64 delta, struct time_scale *scale)
+u64 scale_delta(u64 delta, const struct time_scale *scale)
 {
     u64 product;
 
@@ -635,11 +639,11 @@ u64 stime2tsc(s_time_t stime)
     t = &this_cpu(cpu_time);
     sys_to_tsc = scale_reciprocal(t->tsc_scale);
 
-    stime_delta = stime - t->stime_local_stamp;
+    stime_delta = stime - t->stamp.local_stime;
     if ( stime_delta < 0 )
         stime_delta = 0;
 
-    return t->local_tsc_stamp + scale_delta(stime_delta, &sys_to_tsc);
+    return t->stamp.local_tsc + scale_delta(stime_delta, &sys_to_tsc);
 }
 
 void cstate_restore_tsc(void)
@@ -790,7 +794,7 @@ static unsigned long get_cmos_time(void)
 
 s_time_t get_s_time_fixed(u64 at_tsc)
 {
-    struct cpu_time *t = &this_cpu(cpu_time);
+    const struct cpu_time *t = &this_cpu(cpu_time);
     u64 tsc, delta;
     s_time_t now;
 
@@ -798,8 +802,8 @@ s_time_t get_s_time_fixed(u64 at_tsc)
         tsc = at_tsc;
     else
         tsc = rdtsc_ordered();
-    delta = tsc - t->local_tsc_stamp;
-    now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
+    delta = tsc - t->stamp.local_tsc;
+    now = t->stamp.local_stime + scale_delta(delta, &t->tsc_scale);
 
     return now;
 }
@@ -818,7 +822,7 @@ uint64_t tsc_ticks2ns(uint64_t ticks)
 
 static void __update_vcpu_system_time(struct vcpu *v, int force)
 {
-    struct cpu_time       *t;
+    const struct cpu_time *t;
     struct vcpu_time_info *u, _u = {};
     struct domain *d = v->domain;
     s_time_t tsc_stamp;
@@ -831,7 +835,7 @@ static void __update_vcpu_system_time(st
 
     if ( d->arch.vtsc )
     {
-        s_time_t stime = t->stime_local_stamp;
+        s_time_t stime = t->stamp.local_stime;
 
         if ( is_hvm_domain(d) )
         {
@@ -853,20 +857,20 @@ static void __update_vcpu_system_time(st
     {
         if ( has_hvm_container_domain(d) && hvm_tsc_scaling_supported )
         {
-            tsc_stamp            = hvm_scale_tsc(d, t->local_tsc_stamp);
+            tsc_stamp            = hvm_scale_tsc(d, t->stamp.local_tsc);
             _u.tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
             _u.tsc_shift         = d->arch.vtsc_to_ns.shift;
         }
         else
         {
-            tsc_stamp            = t->local_tsc_stamp;
+            tsc_stamp            = t->stamp.local_tsc;
             _u.tsc_to_system_mul = t->tsc_scale.mul_frac;
             _u.tsc_shift         = t->tsc_scale.shift;
         }
     }
 
     _u.tsc_timestamp = tsc_stamp;
-    _u.system_time   = t->stime_local_stamp;
+    _u.system_time   = t->stamp.local_stime;
 
     if ( is_hvm_domain(d) )
         _u.tsc_timestamp += v->arch.hvm_vcpu.cache_tsc_offset;
@@ -966,12 +970,12 @@ int cpu_frequency_change(u64 freq)
 
     local_irq_disable();
     /* Platform time /first/, as we may be delayed by platform_timer_lock. */
-    t->stime_master_stamp = read_platform_stime();
-    /* TSC-extrapolated time may be bogus after frequency change. */
-    /*t->stime_local_stamp = get_s_time();*/
-    t->stime_local_stamp = t->stime_master_stamp;
+    t->stamp.master_stime = read_platform_stime();
     curr_tsc = rdtsc_ordered();
-    t->local_tsc_stamp = curr_tsc;
+    /* TSC-extrapolated time may be bogus after frequency change. */
+    /*t->stamp.local_stime = get_s_time_fixed(curr_tsc);*/
+    t->stamp.local_stime = t->stamp.master_stime;
+    t->stamp.local_tsc = curr_tsc;
     set_time_scale(&t->tsc_scale, freq);
     local_irq_enable();
 
@@ -988,28 +992,19 @@ int cpu_frequency_change(u64 freq)
 }
 
 /* Per-CPU communication between rendezvous IRQ and softirq handler. */
-struct cpu_calibration {
-    u64 local_tsc_stamp;
-    s_time_t stime_local_stamp;
-    s_time_t stime_master_stamp;
-};
-static DEFINE_PER_CPU(struct cpu_calibration, cpu_calibration);
+static DEFINE_PER_CPU(struct cpu_time_stamp, cpu_calibration);
 
 /* Softirq handler for per-CPU time calibration. */
 static void local_time_calibration(void)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
-    struct cpu_calibration *c = &this_cpu(cpu_calibration);
+    const struct cpu_time_stamp *c = &this_cpu(cpu_calibration);
 
     /*
-     * System timestamps, extrapolated from local and master oscillators,
-     * taken during this calibration and the previous calibration.
+     * System (extrapolated from local and master oscillators) and TSC
+     * timestamps, taken during this calibration and the previous one.
      */
-    s_time_t prev_local_stime, curr_local_stime;
-    s_time_t prev_master_stime, curr_master_stime;
-
-    /* TSC timestamps taken during this calibration and prev calibration. */
-    u64 prev_tsc, curr_tsc;
+    struct cpu_time_stamp prev, curr;
 
     /*
      * System time and TSC ticks elapsed during the previous calibration
@@ -1018,9 +1013,6 @@ static void local_time_calibration(void)
     u64 stime_elapsed64, tsc_elapsed64;
     u32 stime_elapsed32, tsc_elapsed32;
 
-    /* The accumulated error in the local estimate. */
-    u64 local_stime_err;
-
     /* Error correction to slow down a fast local clock. */
     u32 error_factor = 0;
 
@@ -1034,40 +1026,34 @@ static void local_time_calibration(void)
     {
         /* Atomically read cpu_calibration struct and write cpu_time struct. */
         local_irq_disable();
-        t->local_tsc_stamp    = c->local_tsc_stamp;
-        t->stime_local_stamp  = c->stime_local_stamp;
-        t->stime_master_stamp = c->stime_master_stamp;
+        t->stamp = *c;
         local_irq_enable();
         update_vcpu_system_time(current);
         goto out;
     }
 
-    prev_tsc          = t->local_tsc_stamp;
-    prev_local_stime  = t->stime_local_stamp;
-    prev_master_stime = t->stime_master_stamp;
+    prev = t->stamp;
 
     /* Disabling IRQs ensures we atomically read cpu_calibration struct. */
     local_irq_disable();
-    curr_tsc          = c->local_tsc_stamp;
-    curr_local_stime  = c->stime_local_stamp;
-    curr_master_stime = c->stime_master_stamp;
+    curr = *c;
     local_irq_enable();
 
 #if 0
     printk("PRE%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64"\n",
-           smp_processor_id(), prev_tsc, prev_local_stime, prev_master_stime);
+           smp_processor_id(), prev.local_tsc, prev.local_stime, prev.master_stime);
     printk("CUR%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64
            " -> %"PRId64"\n",
-           smp_processor_id(), curr_tsc, curr_local_stime, curr_master_stime,
-           curr_master_stime - curr_local_stime);
+           smp_processor_id(), curr.local_tsc, curr.local_stime, curr.master_stime,
+           curr.master_stime - curr.local_stime);
 #endif
 
     /* Local time warps forward if it lags behind master time. */
-    if ( curr_local_stime < curr_master_stime )
-        curr_local_stime = curr_master_stime;
+    if ( curr.local_stime < curr.master_stime )
+        curr.local_stime = curr.master_stime;
 
-    stime_elapsed64 = curr_master_stime - prev_master_stime;
-    tsc_elapsed64   = curr_tsc - prev_tsc;
+    stime_elapsed64 = curr.master_stime - prev.master_stime;
+    tsc_elapsed64   = curr.local_tsc - prev.local_tsc;
 
     /*
      * Weirdness can happen if we lose sync with the platform timer.
@@ -1081,9 +1067,10 @@ static void local_time_calibration(void)
      * clock (slow clocks are warped forwards). The scale factor is clamped
      * to >= 0.5.
      */
-    if ( curr_local_stime != curr_master_stime )
+    if ( curr.local_stime != curr.master_stime )
     {
-        local_stime_err = curr_local_stime - curr_master_stime;
+        u64 local_stime_err = curr.local_stime - curr.master_stime;
+
         if ( local_stime_err > EPOCH )
             local_stime_err = EPOCH;
         error_factor = div_frac(EPOCH, EPOCH + (u32)local_stime_err);
@@ -1136,9 +1123,7 @@ static void local_time_calibration(void)
     local_irq_disable();
     t->tsc_scale.mul_frac = calibration_mul_frac;
     t->tsc_scale.shift    = tsc_shift;
-    t->local_tsc_stamp    = curr_tsc;
-    t->stime_local_stamp  = curr_local_stime;
-    t->stime_master_stamp = curr_master_stime;
+    t->stamp              = curr;
     local_irq_enable();
 
     update_vcpu_system_time(current);
@@ -1261,11 +1246,11 @@ struct calibration_rendezvous {
 static void
 time_calibration_rendezvous_tail(const struct calibration_rendezvous *r)
 {
-    struct cpu_calibration *c = &this_cpu(cpu_calibration);
+    struct cpu_time_stamp *c = &this_cpu(cpu_calibration);
 
-    c->local_tsc_stamp = rdtsc_ordered();
-    c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
-    c->stime_master_stamp = r->master_stime;
+    c->local_tsc    = rdtsc_ordered();
+    c->local_stime  = get_s_time_fixed(c->local_tsc);
+    c->master_stime = r->master_stime;
 
     raise_softirq(TIME_CALIBRATE_SOFTIRQ);
 }
@@ -1380,21 +1365,18 @@ void __init clear_tsc_cap(unsigned int f
     time_calibration_rendezvous_fn = rendezvous_fn;
 }
 
-static struct {
-    s_time_t local_stime, master_stime;
-} ap_bringup_ref;
+static struct cpu_time_stamp ap_bringup_ref;
 
 void time_latch_stamps(void)
 {
     unsigned long flags;
-    u64 tsc;
 
     local_irq_save(flags);
     ap_bringup_ref.master_stime = read_platform_stime();
-    tsc = rdtsc_ordered();
+    ap_bringup_ref.local_tsc = rdtsc_ordered();
     local_irq_restore(flags);
 
-    ap_bringup_ref.local_stime = get_s_time_fixed(tsc);
+    ap_bringup_ref.local_stime = get_s_time_fixed(ap_bringup_ref.local_tsc);
 }
 
 void init_percpu_time(void)
@@ -1412,7 +1394,7 @@ void init_percpu_time(void)
     tsc = rdtsc_ordered();
     local_irq_restore(flags);
 
-    t->stime_master_stamp = now;
+    t->stamp.master_stime = now;
     /*
      * To avoid a discontinuity (TSC and platform clock can't be expected
      * to be in perfect sync), initialization here needs to match up with
@@ -1425,8 +1407,8 @@ void init_percpu_time(void)
         else
             now += ap_bringup_ref.local_stime - ap_bringup_ref.master_stime;
     }
-    t->local_tsc_stamp    = tsc;
-    t->stime_local_stamp  = now;
+    t->stamp.local_tsc   = tsc;
+    t->stamp.local_stime = now;
 }
 
 /*
@@ -1550,7 +1532,7 @@ void __init early_time_init(void)
     tmp = init_platform_timer();
 
     set_time_scale(&t->tsc_scale, tmp);
-    t->local_tsc_stamp = boot_tsc_stamp;
+    t->stamp.local_tsc = boot_tsc_stamp;
 
     do_div(tmp, 1000);
     cpu_khz = (unsigned long)tmp;
--- a/xen/include/asm-x86/time.h
+++ b/xen/include/asm-x86/time.h
@@ -78,6 +78,6 @@ u64 stime2tsc(s_time_t stime);
 
 struct time_scale;
 void set_time_scale(struct time_scale *ts, u64 ticks_per_sec);
-u64 scale_delta(u64 delta, struct time_scale *scale);
+u64 scale_delta(u64 delta, const struct time_scale *scale);
 
 #endif /* __X86_TIME_H__ */



[-- Attachment #2: x86-time-calibration-pack.patch --]
[-- Type: text/plain, Size: 12238 bytes --]

x86/time: group time stamps into a structure

If that had been done from the beginning, mistakes like the one
corrected in commit b64438c7c1 ("x86/time: use correct (local) time
stamp in constant-TSC calibration fast path") would likely never have
happened.

Also add a few "const" to make more obvious when things aren't expected
to change.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -47,10 +47,14 @@ unsigned long __read_mostly cpu_khz;  /*
 DEFINE_SPINLOCK(rtc_lock);
 unsigned long pit0_ticks;
 
+struct cpu_time_stamp {
+    u64 local_tsc;
+    s_time_t local_stime;
+    s_time_t master_stime;
+};
+
 struct cpu_time {
-    u64 local_tsc_stamp;
-    s_time_t stime_local_stamp;
-    s_time_t stime_master_stamp;
+    struct cpu_time_stamp stamp;
     struct time_scale tsc_scale;
 };
 
@@ -118,7 +122,7 @@ static inline u32 mul_frac(u32 multiplic
  * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
  * yielding a 64-bit result.
  */
-u64 scale_delta(u64 delta, struct time_scale *scale)
+u64 scale_delta(u64 delta, const struct time_scale *scale)
 {
     u64 product;
 
@@ -635,11 +639,11 @@ u64 stime2tsc(s_time_t stime)
     t = &this_cpu(cpu_time);
     sys_to_tsc = scale_reciprocal(t->tsc_scale);
 
-    stime_delta = stime - t->stime_local_stamp;
+    stime_delta = stime - t->stamp.local_stime;
     if ( stime_delta < 0 )
         stime_delta = 0;
 
-    return t->local_tsc_stamp + scale_delta(stime_delta, &sys_to_tsc);
+    return t->stamp.local_tsc + scale_delta(stime_delta, &sys_to_tsc);
 }
 
 void cstate_restore_tsc(void)
@@ -790,7 +794,7 @@ static unsigned long get_cmos_time(void)
 
 s_time_t get_s_time_fixed(u64 at_tsc)
 {
-    struct cpu_time *t = &this_cpu(cpu_time);
+    const struct cpu_time *t = &this_cpu(cpu_time);
     u64 tsc, delta;
     s_time_t now;
 
@@ -798,8 +802,8 @@ s_time_t get_s_time_fixed(u64 at_tsc)
         tsc = at_tsc;
     else
         tsc = rdtsc_ordered();
-    delta = tsc - t->local_tsc_stamp;
-    now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
+    delta = tsc - t->stamp.local_tsc;
+    now = t->stamp.local_stime + scale_delta(delta, &t->tsc_scale);
 
     return now;
 }
@@ -818,7 +822,7 @@ uint64_t tsc_ticks2ns(uint64_t ticks)
 
 static void __update_vcpu_system_time(struct vcpu *v, int force)
 {
-    struct cpu_time       *t;
+    const struct cpu_time *t;
     struct vcpu_time_info *u, _u = {};
     struct domain *d = v->domain;
     s_time_t tsc_stamp;
@@ -831,7 +835,7 @@ static void __update_vcpu_system_time(st
 
     if ( d->arch.vtsc )
     {
-        s_time_t stime = t->stime_local_stamp;
+        s_time_t stime = t->stamp.local_stime;
 
         if ( is_hvm_domain(d) )
         {
@@ -853,20 +857,20 @@ static void __update_vcpu_system_time(st
     {
         if ( has_hvm_container_domain(d) && hvm_tsc_scaling_supported )
         {
-            tsc_stamp            = hvm_scale_tsc(d, t->local_tsc_stamp);
+            tsc_stamp            = hvm_scale_tsc(d, t->stamp.local_tsc);
             _u.tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
             _u.tsc_shift         = d->arch.vtsc_to_ns.shift;
         }
         else
         {
-            tsc_stamp            = t->local_tsc_stamp;
+            tsc_stamp            = t->stamp.local_tsc;
             _u.tsc_to_system_mul = t->tsc_scale.mul_frac;
             _u.tsc_shift         = t->tsc_scale.shift;
         }
     }
 
     _u.tsc_timestamp = tsc_stamp;
-    _u.system_time   = t->stime_local_stamp;
+    _u.system_time   = t->stamp.local_stime;
 
     if ( is_hvm_domain(d) )
         _u.tsc_timestamp += v->arch.hvm_vcpu.cache_tsc_offset;
@@ -966,12 +970,12 @@ int cpu_frequency_change(u64 freq)
 
     local_irq_disable();
     /* Platform time /first/, as we may be delayed by platform_timer_lock. */
-    t->stime_master_stamp = read_platform_stime();
-    /* TSC-extrapolated time may be bogus after frequency change. */
-    /*t->stime_local_stamp = get_s_time();*/
-    t->stime_local_stamp = t->stime_master_stamp;
+    t->stamp.master_stime = read_platform_stime();
     curr_tsc = rdtsc_ordered();
-    t->local_tsc_stamp = curr_tsc;
+    /* TSC-extrapolated time may be bogus after frequency change. */
+    /*t->stamp.local_stime = get_s_time_fixed(curr_tsc);*/
+    t->stamp.local_stime = t->stamp.master_stime;
+    t->stamp.local_tsc = curr_tsc;
     set_time_scale(&t->tsc_scale, freq);
     local_irq_enable();
 
@@ -988,28 +992,19 @@ int cpu_frequency_change(u64 freq)
 }
 
 /* Per-CPU communication between rendezvous IRQ and softirq handler. */
-struct cpu_calibration {
-    u64 local_tsc_stamp;
-    s_time_t stime_local_stamp;
-    s_time_t stime_master_stamp;
-};
-static DEFINE_PER_CPU(struct cpu_calibration, cpu_calibration);
+static DEFINE_PER_CPU(struct cpu_time_stamp, cpu_calibration);
 
 /* Softirq handler for per-CPU time calibration. */
 static void local_time_calibration(void)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
-    struct cpu_calibration *c = &this_cpu(cpu_calibration);
+    const struct cpu_time_stamp *c = &this_cpu(cpu_calibration);
 
     /*
-     * System timestamps, extrapolated from local and master oscillators,
-     * taken during this calibration and the previous calibration.
+     * System (extrapolated from local and master oscillators) and TSC
+     * timestamps, taken during this calibration and the previous one.
      */
-    s_time_t prev_local_stime, curr_local_stime;
-    s_time_t prev_master_stime, curr_master_stime;
-
-    /* TSC timestamps taken during this calibration and prev calibration. */
-    u64 prev_tsc, curr_tsc;
+    struct cpu_time_stamp prev, curr;
 
     /*
      * System time and TSC ticks elapsed during the previous calibration
@@ -1018,9 +1013,6 @@ static void local_time_calibration(void)
     u64 stime_elapsed64, tsc_elapsed64;
     u32 stime_elapsed32, tsc_elapsed32;
 
-    /* The accumulated error in the local estimate. */
-    u64 local_stime_err;
-
     /* Error correction to slow down a fast local clock. */
     u32 error_factor = 0;
 
@@ -1034,40 +1026,34 @@ static void local_time_calibration(void)
     {
         /* Atomically read cpu_calibration struct and write cpu_time struct. */
         local_irq_disable();
-        t->local_tsc_stamp    = c->local_tsc_stamp;
-        t->stime_local_stamp  = c->stime_local_stamp;
-        t->stime_master_stamp = c->stime_master_stamp;
+        t->stamp = *c;
         local_irq_enable();
         update_vcpu_system_time(current);
         goto out;
     }
 
-    prev_tsc          = t->local_tsc_stamp;
-    prev_local_stime  = t->stime_local_stamp;
-    prev_master_stime = t->stime_master_stamp;
+    prev = t->stamp;
 
     /* Disabling IRQs ensures we atomically read cpu_calibration struct. */
     local_irq_disable();
-    curr_tsc          = c->local_tsc_stamp;
-    curr_local_stime  = c->stime_local_stamp;
-    curr_master_stime = c->stime_master_stamp;
+    curr = *c;
     local_irq_enable();
 
 #if 0
     printk("PRE%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64"\n",
-           smp_processor_id(), prev_tsc, prev_local_stime, prev_master_stime);
+           smp_processor_id(), prev.local_tsc, prev.local_stime, prev.master_stime);
     printk("CUR%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64
            " -> %"PRId64"\n",
-           smp_processor_id(), curr_tsc, curr_local_stime, curr_master_stime,
-           curr_master_stime - curr_local_stime);
+           smp_processor_id(), curr.local_tsc, curr.local_stime, curr.master_stime,
+           curr.master_stime - curr.local_stime);
 #endif
 
     /* Local time warps forward if it lags behind master time. */
-    if ( curr_local_stime < curr_master_stime )
-        curr_local_stime = curr_master_stime;
+    if ( curr.local_stime < curr.master_stime )
+        curr.local_stime = curr.master_stime;
 
-    stime_elapsed64 = curr_master_stime - prev_master_stime;
-    tsc_elapsed64   = curr_tsc - prev_tsc;
+    stime_elapsed64 = curr.master_stime - prev.master_stime;
+    tsc_elapsed64   = curr.local_tsc - prev.local_tsc;
 
     /*
      * Weirdness can happen if we lose sync with the platform timer.
@@ -1081,9 +1067,10 @@ static void local_time_calibration(void)
      * clock (slow clocks are warped forwards). The scale factor is clamped
      * to >= 0.5.
      */
-    if ( curr_local_stime != curr_master_stime )
+    if ( curr.local_stime != curr.master_stime )
     {
-        local_stime_err = curr_local_stime - curr_master_stime;
+        u64 local_stime_err = curr.local_stime - curr.master_stime;
+
         if ( local_stime_err > EPOCH )
             local_stime_err = EPOCH;
         error_factor = div_frac(EPOCH, EPOCH + (u32)local_stime_err);
@@ -1136,9 +1123,7 @@ static void local_time_calibration(void)
     local_irq_disable();
     t->tsc_scale.mul_frac = calibration_mul_frac;
     t->tsc_scale.shift    = tsc_shift;
-    t->local_tsc_stamp    = curr_tsc;
-    t->stime_local_stamp  = curr_local_stime;
-    t->stime_master_stamp = curr_master_stime;
+    t->stamp              = curr;
     local_irq_enable();
 
     update_vcpu_system_time(current);
@@ -1261,11 +1246,11 @@ struct calibration_rendezvous {
 static void
 time_calibration_rendezvous_tail(const struct calibration_rendezvous *r)
 {
-    struct cpu_calibration *c = &this_cpu(cpu_calibration);
+    struct cpu_time_stamp *c = &this_cpu(cpu_calibration);
 
-    c->local_tsc_stamp = rdtsc_ordered();
-    c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
-    c->stime_master_stamp = r->master_stime;
+    c->local_tsc    = rdtsc_ordered();
+    c->local_stime  = get_s_time_fixed(c->local_tsc);
+    c->master_stime = r->master_stime;
 
     raise_softirq(TIME_CALIBRATE_SOFTIRQ);
 }
@@ -1380,21 +1365,18 @@ void __init clear_tsc_cap(unsigned int f
     time_calibration_rendezvous_fn = rendezvous_fn;
 }
 
-static struct {
-    s_time_t local_stime, master_stime;
-} ap_bringup_ref;
+static struct cpu_time_stamp ap_bringup_ref;
 
 void time_latch_stamps(void)
 {
     unsigned long flags;
-    u64 tsc;
 
     local_irq_save(flags);
     ap_bringup_ref.master_stime = read_platform_stime();
-    tsc = rdtsc_ordered();
+    ap_bringup_ref.local_tsc = rdtsc_ordered();
     local_irq_restore(flags);
 
-    ap_bringup_ref.local_stime = get_s_time_fixed(tsc);
+    ap_bringup_ref.local_stime = get_s_time_fixed(ap_bringup_ref.local_tsc);
 }
 
 void init_percpu_time(void)
@@ -1412,7 +1394,7 @@ void init_percpu_time(void)
     tsc = rdtsc_ordered();
     local_irq_restore(flags);
 
-    t->stime_master_stamp = now;
+    t->stamp.master_stime = now;
     /*
      * To avoid a discontinuity (TSC and platform clock can't be expected
      * to be in perfect sync), initialization here needs to match up with
@@ -1425,8 +1407,8 @@ void init_percpu_time(void)
         else
             now += ap_bringup_ref.local_stime - ap_bringup_ref.master_stime;
     }
-    t->local_tsc_stamp    = tsc;
-    t->stime_local_stamp  = now;
+    t->stamp.local_tsc   = tsc;
+    t->stamp.local_stime = now;
 }
 
 /*
@@ -1550,7 +1532,7 @@ void __init early_time_init(void)
     tmp = init_platform_timer();
 
     set_time_scale(&t->tsc_scale, tmp);
-    t->local_tsc_stamp = boot_tsc_stamp;
+    t->stamp.local_tsc = boot_tsc_stamp;
 
     do_div(tmp, 1000);
     cpu_khz = (unsigned long)tmp;
--- a/xen/include/asm-x86/time.h
+++ b/xen/include/asm-x86/time.h
@@ -78,6 +78,6 @@ u64 stime2tsc(s_time_t stime);
 
 struct time_scale;
 void set_time_scale(struct time_scale *ts, u64 ticks_per_sec);
-u64 scale_delta(u64 delta, struct time_scale *scale);
+u64 scale_delta(u64 delta, const struct time_scale *scale);
 
 #endif /* __X86_TIME_H__ */

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 6/6] x86/time: relax barriers
  2016-08-03 12:55 [PATCH v2 0/6] x86/time: improve cross-CPU clock monotonicity (and more) Jan Beulich
                   ` (4 preceding siblings ...)
  2016-08-03 13:03 ` [PATCH v2 5/6] x86/time: group time stamps into a structure Jan Beulich
@ 2016-08-03 13:04 ` Jan Beulich
  2016-08-03 13:11   ` Andrew Cooper
  5 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2016-08-03 13:04 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 2549 bytes --]

On x86 there's no need for full barriers in loops waiting for some
memory location to change. Nor do we need full barriers between two
reads and two writes - SMP ones fully suffice (and I actually think
they could in fact be dropped, since atomic_*() operations should
already provide enough ordering).

--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1206,7 +1206,7 @@ static void tsc_check_slave(void *unused
     unsigned int cpu = smp_processor_id();
     local_irq_disable();
     while ( !cpumask_test_cpu(cpu, &tsc_check_cpumask) )
-        mb();
+        cpu_relax();
     check_tsc_warp(cpu_khz, &tsc_max_warp);
     cpumask_clear_cpu(cpu, &tsc_check_cpumask);
     local_irq_enable();
@@ -1271,7 +1271,7 @@ static void time_calibration_tsc_rendezv
         if ( smp_processor_id() == 0 )
         {
             while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
-                mb();
+                cpu_relax();
 
             if ( r->master_stime == 0 )
             {
@@ -1284,21 +1284,21 @@ static void time_calibration_tsc_rendezv
                 write_tsc(r->master_tsc_stamp);
 
             while ( atomic_read(&r->semaphore) != (2*total_cpus - 1) )
-                mb();
+                cpu_relax();
             atomic_set(&r->semaphore, 0);
         }
         else
         {
             atomic_inc(&r->semaphore);
             while ( atomic_read(&r->semaphore) < total_cpus )
-                mb();
+                cpu_relax();
 
             if ( i == 0 )
                 write_tsc(r->master_tsc_stamp);
 
             atomic_inc(&r->semaphore);
             while ( atomic_read(&r->semaphore) > total_cpus )
-                mb();
+                cpu_relax();
         }
     }
 
@@ -1316,7 +1316,7 @@ static void time_calibration_std_rendezv
         while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
-        mb(); /* write r->master_stime /then/ signal */
+        smp_wmb(); /* write r->master_stime /then/ signal */
         atomic_inc(&r->semaphore);
     }
     else
@@ -1324,7 +1324,7 @@ static void time_calibration_std_rendezv
         atomic_inc(&r->semaphore);
         while ( atomic_read(&r->semaphore) != total_cpus )
             cpu_relax();
-        mb(); /* receive signal /then/ read r->master_stime */
+        smp_rmb(); /* receive signal /then/ read r->master_stime */
     }
 
     time_calibration_rendezvous_tail(r);



[-- Attachment #2: x86-time-relax-barriers.patch --]
[-- Type: text/plain, Size: 2573 bytes --]

x86/time: relax barriers

On x86 there's no need for full barriers in loops waiting for some
memory location to change. Nor do we need full barriers between two
reads and two writes - SMP ones fully suffice (and I actually think
they could in fact be dropped, since atomic_*() operations should
already provide enough ordering).

--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1206,7 +1206,7 @@ static void tsc_check_slave(void *unused
     unsigned int cpu = smp_processor_id();
     local_irq_disable();
     while ( !cpumask_test_cpu(cpu, &tsc_check_cpumask) )
-        mb();
+        cpu_relax();
     check_tsc_warp(cpu_khz, &tsc_max_warp);
     cpumask_clear_cpu(cpu, &tsc_check_cpumask);
     local_irq_enable();
@@ -1271,7 +1271,7 @@ static void time_calibration_tsc_rendezv
         if ( smp_processor_id() == 0 )
         {
             while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
-                mb();
+                cpu_relax();
 
             if ( r->master_stime == 0 )
             {
@@ -1284,21 +1284,21 @@ static void time_calibration_tsc_rendezv
                 write_tsc(r->master_tsc_stamp);
 
             while ( atomic_read(&r->semaphore) != (2*total_cpus - 1) )
-                mb();
+                cpu_relax();
             atomic_set(&r->semaphore, 0);
         }
         else
         {
             atomic_inc(&r->semaphore);
             while ( atomic_read(&r->semaphore) < total_cpus )
-                mb();
+                cpu_relax();
 
             if ( i == 0 )
                 write_tsc(r->master_tsc_stamp);
 
             atomic_inc(&r->semaphore);
             while ( atomic_read(&r->semaphore) > total_cpus )
-                mb();
+                cpu_relax();
         }
     }
 
@@ -1316,7 +1316,7 @@ static void time_calibration_std_rendezv
         while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
-        mb(); /* write r->master_stime /then/ signal */
+        smp_wmb(); /* write r->master_stime /then/ signal */
         atomic_inc(&r->semaphore);
     }
     else
@@ -1324,7 +1324,7 @@ static void time_calibration_std_rendezv
         atomic_inc(&r->semaphore);
         while ( atomic_read(&r->semaphore) != total_cpus )
             cpu_relax();
-        mb(); /* receive signal /then/ read r->master_stime */
+        smp_rmb(); /* receive signal /then/ read r->master_stime */
     }
 
     time_calibration_rendezvous_tail(r);

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/6] x86/time: calibrate TSC against platform timer
  2016-08-03 13:00 ` [PATCH v2 1/6] x86/time: calibrate TSC against platform timer Jan Beulich
@ 2016-08-03 13:07   ` Andrew Cooper
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Cooper @ 2016-08-03 13:07 UTC (permalink / raw)
  To: Jan Beulich, xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 1118 bytes --]

On 03/08/16 14:00, Jan Beulich wrote:
> ... instead of unconditionally against the PIT. This allows for local
> and master system times to remain in better sync (which matters even
> when, on any modern system, the master time is really used only during
> secondary CPU bringup, as the error between the two is in fact
> noticable in cross-CPU NOW() invocation monotonicity).
>
> This involves moving the init_platform_timer() invocation into
> early_time_init(), splitting out the few things which really need to be
> done in init_xen_time(). That in turn allows dropping the open coded
> PIT initialization from init_IRQ() (it was needed for APIC clock
> calibration, which runs between early_time_init() and init_xen_time()).
>
> In the course of this re-ordering also set the timer channel 2 gate low
> after having finished calibration. This should be benign to overall
> system operation, but appears to be the more clean state.
>
> Also do away with open coded 8254 register manipulation from 8259 code.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

[-- Attachment #1.2: Type: text/html, Size: 1644 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 6/6] x86/time: relax barriers
  2016-08-03 13:04 ` [PATCH v2 6/6] x86/time: relax barriers Jan Beulich
@ 2016-08-03 13:11   ` Andrew Cooper
  2016-08-03 13:22     ` Jan Beulich
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Cooper @ 2016-08-03 13:11 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 03/08/16 14:04, Jan Beulich wrote:
> On x86 there's no need for full barriers in loops waiting for some
> memory location to change. Nor do we need full barriers between two
> reads and two writes - SMP ones fully suffice (and I actually think
> they could in fact be dropped, since atomic_*() operations should
> already provide enough ordering).

Missing a SoB,

Which "ones" are you referring to?  atomic_*() is only ordered with
respect to the atomic_t used.

Overall, I think the change is correct, so Reviewed-by: Andrew Cooper
<andrew.cooper3@citrix.com>

There are definitely more mis-uses of mandatory barriers in Xen,
although I haven't done a full audit yet.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 6/6] x86/time: relax barriers
  2016-08-03 13:11   ` Andrew Cooper
@ 2016-08-03 13:22     ` Jan Beulich
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2016-08-03 13:22 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 03.08.16 at 15:11, <andrew.cooper3@citrix.com> wrote:
> On 03/08/16 14:04, Jan Beulich wrote:
>> On x86 there's no need for full barriers in loops waiting for some
>> memory location to change. Nor do we need full barriers between two
>> reads and two writes - SMP ones fully suffice (and I actually think
>> they could in fact be dropped, since atomic_*() operations should
>> already provide enough ordering).
> 
> Missing a SoB,

Oops.

> Which "ones" are you referring to?  atomic_*() is only ordered with
> respect to the atomic_t used.

Oh, right - the one followed by atomic_inc() could be converted
to plain barrier() (but not dropped entirely), while the other one
follows an atomic_read() only, and hence can't be dropped. I'll
remove that part of the description.

> Overall, I think the change is correct, so Reviewed-by: Andrew Cooper
> <andrew.cooper3@citrix.com>

Thanks.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Ping: [PATCH v2 2/6] x86/time: correctly honor late clearing of TSC related feature flags
  2016-08-03 13:00 ` [PATCH v2 2/6] x86/time: correctly honor late clearing of TSC related feature flags Jan Beulich
@ 2016-08-17 13:41   ` Jan Beulich
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2016-08-17 13:41 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

Ping?

>>> On 03.08.16 at 15:00, <JBeulich@suse.com> wrote:
> As such clearing of flags may have an impact on the selected rendezvous
> function, handle such in a central place.
> 
> But don't allow such feature flags to be cleared during CPU hotplug:
> Platform and local system times may have diverged significantly by
> then, potentially causing noticably (even if only temporary) strange
> behavior. As we're anyway expecting only sufficiently similar CPUs to
> appear during hotplug, this shouldn't be introducing new limitations.
> 
> Reported-by: Joao Martins <joao.m.martins@oracle.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
> Tested-by: Joao Martins <joao.m.martins@oracle.com>
> 
> --- a/xen/arch/x86/cpu/mwait-idle.c
> +++ b/xen/arch/x86/cpu/mwait-idle.c
> @@ -1162,8 +1162,8 @@ static int mwait_idle_cpu_init(struct no
>  		}
>  
>  		if (state > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
> -		    !pm_idle_save)
> -			setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> +		    !pm_idle_save && system_state < SYS_STATE_active)
> +			clear_tsc_cap(X86_FEATURE_TSC_RELIABLE);
>  
>  		cx = dev->states + dev->count;
>  		cx->type = state;
> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -1353,6 +1353,24 @@ static void time_calibration(void *unuse
>                       &r, 1);
>  }
>  
> +void __init clear_tsc_cap(unsigned int feature)
> +{
> +    void (*rendezvous_fn)(void *) = time_calibration_std_rendezvous;
> +
> +    if ( feature )
> +        setup_clear_cpu_cap(feature);
> +
> +    /* If we have constant-rate TSCs then scale factor can be shared. */
> +    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
> +    {
> +        /* If TSCs are not marked as 'reliable', re-sync during rendezvous. 
> */
> +        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
> +            rendezvous_fn = time_calibration_tsc_rendezvous;
> +    }
> +
> +    time_calibration_rendezvous_fn = rendezvous_fn;
> +}
> +
>  static struct {
>      s_time_t local_stime, master_stime;
>  } ap_bringup_ref;
> @@ -1478,7 +1496,7 @@ static int __init verify_tsc_reliability
>          {
>              printk("%s: TSC warp detected, disabling TSC_RELIABLE\n",
>                     __func__);
> -            setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> +            clear_tsc_cap(X86_FEATURE_TSC_RELIABLE);
>          }
>      }
>  
> @@ -1491,13 +1509,7 @@ int __init init_xen_time(void)
>  {
>      tsc_check_writability();
>  
> -    /* If we have constant-rate TSCs then scale factor can be shared. */
> -    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
> -    {
> -        /* If TSCs are not marked as 'reliable', re-sync during rendezvous. 
> */
> -        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
> -            time_calibration_rendezvous_fn = 
> time_calibration_tsc_rendezvous;
> -    }
> +    clear_tsc_cap(0);
>  
>      open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
>  
> --- a/xen/include/asm-x86/time.h
> +++ b/xen/include/asm-x86/time.h
> @@ -70,6 +70,7 @@ void tsc_get_info(struct domain *d, uint
>  void force_update_vcpu_system_time(struct vcpu *v);
>  
>  int host_tsc_is_safe(void);
> +void clear_tsc_cap(unsigned int feature);
>  void cpuid_time_leaf(uint32_t sub_idx, uint32_t *eax, uint32_t *ebx,
>                       uint32_t *ecx, uint32_t *edx);
>  




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2016-08-17 13:41 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-03 12:55 [PATCH v2 0/6] x86/time: improve cross-CPU clock monotonicity (and more) Jan Beulich
2016-08-03 13:00 ` [PATCH v2 1/6] x86/time: calibrate TSC against platform timer Jan Beulich
2016-08-03 13:07   ` Andrew Cooper
2016-08-03 13:00 ` [PATCH v2 2/6] x86/time: correctly honor late clearing of TSC related feature flags Jan Beulich
2016-08-17 13:41   ` Ping: " Jan Beulich
2016-08-03 13:01 ` [PATCH v2 3/6] x86/time: support 32-bit wide ACPI PM timer Jan Beulich
2016-08-03 13:02 ` [PATCH v2 4/6] x86/time: fold recurring code Jan Beulich
2016-08-03 13:03 ` [PATCH v2 5/6] x86/time: group time stamps into a structure Jan Beulich
2016-08-03 13:04 ` [PATCH v2 6/6] x86/time: relax barriers Jan Beulich
2016-08-03 13:11   ` Andrew Cooper
2016-08-03 13:22     ` Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).