All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: xen-devel <xen-devel@lists.xenproject.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
	Dario Faggioli <dario.faggioli@citrix.com>,
	Joao Martins <joao.m.martins@oracle.com>
Subject: [PATCH 1/8] x86/time: improve cross-CPU clock monotonicity (and more)
Date: Wed, 15 Jun 2016 04:26:16 -0600	[thread overview]
Message-ID: <5761496802000078000F5395@prv-mh.provo.novell.com> (raw)
In-Reply-To: <576140F302000078000F52FE@prv-mh.provo.novell.com>

[-- Attachment #1: Type: text/plain, Size: 3420 bytes --]

Using the bare return value from read_platform_stime() is not suitable
when local_time_calibration() is going to use its fast path: Divergence
of several dozen microseconds between NOW() return values on different
CPUs results when platform and local time don't stay in close sync.

Latch local and platform time on the CPU initiating AP bringup, such
that the AP can use these values to seed its stime_local_stamp with as
little of an error as possible. The boot CPU, otoh, can simply
calculate the correct initial value (other CPUs could do so too with
even greater accuracy than the approach being introduced, but that can
work only if all CPUs' TSCs start ticking at the same time, which
generally can't be assumed to be the case on multi-socket systems).

This slightly defers init_percpu_time() (moved ahead by commit
dd2658f966 ["x86/time: initialise time earlier during
start_secondary()"]) in order to reduce as much as possible the gap
between populating the stamps and consuming them.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -328,12 +328,12 @@ void start_secondary(void *unused)
 
     percpu_traps_init();
 
-    init_percpu_time();
-
     cpu_init();
 
     smp_callin();
 
+    init_percpu_time();
+
     setup_secondary_APIC_clock();
 
     /*
@@ -996,6 +996,8 @@ int __cpu_up(unsigned int cpu)
     if ( (ret = do_boot_cpu(apicid, cpu)) != 0 )
         return ret;
 
+    time_latch_stamps();
+
     set_cpu_state(CPU_STATE_ONLINE);
     while ( !cpu_online(cpu) )
     {
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1328,21 +1328,51 @@ static void time_calibration(void *unuse
                      &r, 1);
 }
 
+static struct {
+    s_time_t local_stime, master_stime;
+} ap_bringup_ref;
+
+void time_latch_stamps(void) {
+    unsigned long flags;
+    u64 tsc;
+
+    local_irq_save(flags);
+    ap_bringup_ref.master_stime = read_platform_stime();
+    tsc = rdtsc();
+    local_irq_restore(flags);
+
+    ap_bringup_ref.local_stime = get_s_time_fixed(tsc);
+}
+
 void init_percpu_time(void)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
     unsigned long flags;
+    u64 tsc;
     s_time_t now;
 
     /* Initial estimate for TSC rate. */
     t->tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
 
     local_irq_save(flags);
-    t->local_tsc_stamp = rdtsc();
     now = read_platform_stime();
+    tsc = rdtsc();
     local_irq_restore(flags);
 
     t->stime_master_stamp = now;
+    /*
+     * To avoid a discontinuity (TSC and platform clock can't be expected
+     * to be in perfect sync), initialization here needs to match up with
+     * local_time_calibration()'s decision whether to use its fast path.
+     */
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+    {
+        if ( system_state < SYS_STATE_smp_boot )
+            now = get_s_time_fixed(tsc);
+        else
+            now += ap_bringup_ref.local_stime - ap_bringup_ref.master_stime;
+    }
+    t->local_tsc_stamp    = tsc;
     t->stime_local_stamp  = now;
 }
 
--- a/xen/include/asm-x86/time.h
+++ b/xen/include/asm-x86/time.h
@@ -40,6 +40,7 @@ int time_suspend(void);
 int time_resume(void);
 
 void init_percpu_time(void);
+void time_latch_stamps(void);
 
 struct ioreq;
 int hwdom_pit_access(struct ioreq *ioreq);




[-- Attachment #2: x86-time-init-local-stime.patch --]
[-- Type: text/plain, Size: 3467 bytes --]

x86/time: adjust local system time initialization

Using the bare return value from read_platform_stime() is not suitable
when local_time_calibration() is going to use its fast path: Divergence
of several dozen microseconds between NOW() return values on different
CPUs results when platform and local time don't stay in close sync.

Latch local and platform time on the CPU initiating AP bringup, such
that the AP can use these values to seed its stime_local_stamp with as
little of an error as possible. The boot CPU, otoh, can simply
calculate the correct initial value (other CPUs could do so too with
even greater accuracy than the approach being introduced, but that can
work only if all CPUs' TSCs start ticking at the same time, which
generally can't be assumed to be the case on multi-socket systems).

This slightly defers init_percpu_time() (moved ahead by commit
dd2658f966 ["x86/time: initialise time earlier during
start_secondary()"]) in order to reduce as much as possible the gap
between populating the stamps and consuming them.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -328,12 +328,12 @@ void start_secondary(void *unused)
 
     percpu_traps_init();
 
-    init_percpu_time();
-
     cpu_init();
 
     smp_callin();
 
+    init_percpu_time();
+
     setup_secondary_APIC_clock();
 
     /*
@@ -996,6 +996,8 @@ int __cpu_up(unsigned int cpu)
     if ( (ret = do_boot_cpu(apicid, cpu)) != 0 )
         return ret;
 
+    time_latch_stamps();
+
     set_cpu_state(CPU_STATE_ONLINE);
     while ( !cpu_online(cpu) )
     {
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1328,21 +1328,51 @@ static void time_calibration(void *unuse
                      &r, 1);
 }
 
+static struct {
+    s_time_t local_stime, master_stime;
+} ap_bringup_ref;
+
+void time_latch_stamps(void) {
+    unsigned long flags;
+    u64 tsc;
+
+    local_irq_save(flags);
+    ap_bringup_ref.master_stime = read_platform_stime();
+    tsc = rdtsc();
+    local_irq_restore(flags);
+
+    ap_bringup_ref.local_stime = get_s_time_fixed(tsc);
+}
+
 void init_percpu_time(void)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
     unsigned long flags;
+    u64 tsc;
     s_time_t now;
 
     /* Initial estimate for TSC rate. */
     t->tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
 
     local_irq_save(flags);
-    t->local_tsc_stamp = rdtsc();
     now = read_platform_stime();
+    tsc = rdtsc();
     local_irq_restore(flags);
 
     t->stime_master_stamp = now;
+    /*
+     * To avoid a discontinuity (TSC and platform clock can't be expected
+     * to be in perfect sync), initialization here needs to match up with
+     * local_time_calibration()'s decision whether to use its fast path.
+     */
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+    {
+        if ( system_state < SYS_STATE_smp_boot )
+            now = get_s_time_fixed(tsc);
+        else
+            now += ap_bringup_ref.local_stime - ap_bringup_ref.master_stime;
+    }
+    t->local_tsc_stamp    = tsc;
     t->stime_local_stamp  = now;
 }
 
--- a/xen/include/asm-x86/time.h
+++ b/xen/include/asm-x86/time.h
@@ -40,6 +40,7 @@ int time_suspend(void);
 int time_resume(void);
 
 void init_percpu_time(void);
+void time_latch_stamps(void);
 
 struct ioreq;
 int hwdom_pit_access(struct ioreq *ioreq);

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

  reply	other threads:[~2016-06-15 10:26 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-15  9:50 [PATCH 0/8] x86/time: improve cross-CPU clock monotonicity (and more) Jan Beulich
2016-06-15 10:26 ` Jan Beulich [this message]
2016-06-15 10:32   ` [PATCH 1/8] " Jan Beulich
2016-06-15 22:51   ` Joao Martins
2016-06-16  8:27     ` Jan Beulich
2016-06-16 20:27       ` Joao Martins
2016-06-17  7:32         ` Jan Beulich
2016-06-21 12:05           ` Joao Martins
2016-06-21 12:28             ` Jan Beulich
2016-06-21 13:57               ` Joao Martins
2016-08-02 19:30   ` Andrew Cooper
2016-06-15 10:26 ` [PATCH 2/8] x86: also generate assembler usable equates for synthesized features Jan Beulich
2016-06-20 12:50   ` Andrew Cooper
2016-06-15 10:27 ` [PATCH 3/8] x86/time: introduce and use rdtsc_ordered() Jan Beulich
2016-06-20 12:59   ` Andrew Cooper
2016-06-20 13:06     ` Jan Beulich
2016-06-20 13:07       ` Andrew Cooper
2016-07-11 11:39     ` Dario Faggioli
2016-06-15 10:28 ` [PATCH 4/8] x86/time: calibrate TSC against platform timer Jan Beulich
2016-06-20 14:20   ` Andrew Cooper
2016-06-20 15:19     ` Jan Beulich
2016-08-02 19:25       ` Andrew Cooper
2016-08-03  9:32         ` Jan Beulich
2016-06-15 10:28 ` [PATCH 5/8] x86/time: correctly honor late clearing of TSC related feature flags Jan Beulich
2016-06-20 14:32   ` Andrew Cooper
2016-06-20 15:20     ` Jan Beulich
2016-07-04 15:39       ` Andrew Cooper
2016-07-04 15:53         ` Jan Beulich
2016-08-02 19:08           ` Andrew Cooper
2016-08-03  9:43             ` Jan Beulich
2016-08-31 13:42               ` Andrew Cooper
2016-08-31 14:03                 ` Jan Beulich
2016-06-15 10:29 ` [PATCH 6/8] x86/time: support 32-bit wide ACPI PM timer Jan Beulich
2016-07-04 15:40   ` Andrew Cooper
2016-06-15 10:30 ` [PATCH 7/8] x86/time: fold recurring code Jan Beulich
2016-07-04 15:43   ` Andrew Cooper
2016-06-15 10:30 ` [PATCH 8/8] x86/time: group time stamps into a structure Jan Beulich
2016-07-04 15:57   ` Andrew Cooper
2016-07-01  7:44 ` Ping: [PATCH 0/8] x86/time: improve cross-CPU clock monotonicity (and more) Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5761496802000078000F5395@prv-mh.provo.novell.com \
    --to=jbeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=dario.faggioli@citrix.com \
    --cc=joao.m.martins@oracle.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.