All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] [PVOPS] dom0 sync xen wallclock
@ 2010-02-09 18:12 Stefano Stabellini
  2010-02-09 18:20 ` Stefano Stabellini
  2010-02-10 23:09 ` Jeremy Fitzhardinge
  0 siblings, 2 replies; 17+ messages in thread
From: Stefano Stabellini @ 2010-02-09 18:12 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: xen-devel, Ian Campbell

Hi Jeremy,
this patch removes clock_was_set and adds an atomic notification chain
instead so that not only hrtimers but other parts of the kernel can be
notified of a time change as well.
In fact xen/time.c needs to be notified so that can update xen
wallclock time to keep it in sync; this is necessary otherwise other
PV guests will get a wrong wallclock time at boot.
Please let me know if my approach is suitable for upstream, I am willing
to do any change needed and to send the patch upstream myself if you
give me some directions.
Thanks,

Stefano

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

---

diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 0b56fd4..a0532ec 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -13,6 +13,7 @@
 #include <linux/clockchips.h>
 #include <linux/kernel_stat.h>
 #include <linux/math64.h>
+#include <linux/notifier.h>
 
 #include <asm/pvclock.h>
 #include <asm/xen/hypervisor.h>
@@ -526,6 +527,43 @@ static int __init xen_setup_vsyscall_timeinfo(int cpu)
 }
 #endif	/* CONFIG_PARAVIRT_CLOCK_VSYSCALL */
 
+static void sync_xen_wallclock(unsigned long dummy);
+static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
+static void sync_xen_wallclock(unsigned long dummy)
+{
+	struct xen_platform_op op;
+	struct timespec ts;
+
+	write_seqlock_irq(&xtime_lock);
+
+	set_normalized_timespec(&ts, xtime.tv_sec, xtime.tv_nsec);
+
+	op.cmd = XENPF_settime;
+	op.u.settime.secs        = ts.tv_sec;
+	op.u.settime.nsecs       = ts.tv_nsec;
+	op.u.settime.system_time = xen_clocksource_read();
+	WARN_ON(HYPERVISOR_dom0_op(&op));
+
+	write_sequnlock_irq(&xtime_lock);
+
+	/* Once per minute. */
+	mod_timer(&sync_xen_wallclock_timer, jiffies + 60*HZ);
+}
+
+int xen_update_persistent_clock(struct notifier_block *this,
+		unsigned long event, void *ptr)
+{
+	if (!xen_initial_domain())
+		return -1;
+	mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
+	return 0;
+}
+
+static struct notifier_block xen_clock_was_set = {
+	.notifier_call  = xen_update_persistent_clock,
+};
+
+
 __init void xen_time_init(void)
 {
 	int cpu = smp_processor_id();
@@ -549,6 +587,8 @@ __init void xen_time_init(void)
 	xen_setup_runstate_info(cpu);
 	xen_setup_timer(cpu);
 	xen_setup_cpu_clockevents();
+	if (xen_initial_domain())
+		atomic_notifier_chain_register(&clockset_notifier_list, &xen_clock_was_set);
 
 #ifdef CONFIG_PARAVIRT_CLOCK_VSYSCALL
 	if (xen_setup_vsyscall_timeinfo(cpu) == 0)
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 5d42d55..422e8ef 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -130,7 +130,7 @@ out_resume:
 	dpm_resume_end(PMSG_RESUME);
 
 	/* Make sure timer events get retriggered on all CPUs */
-	clock_was_set();
+	atomic_notifier_call_chain(&clockset_notifier_list, 0, NULL);
 
 out_thaw:
 #ifdef CONFIG_PREEMPT
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4759917..c46a4e5 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -247,7 +247,6 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
 #ifdef CONFIG_HIGH_RES_TIMERS
 struct clock_event_device;
 
-extern void clock_was_set(void);
 extern void hres_timers_resume(void);
 extern void hrtimer_interrupt(struct clock_event_device *dev);
 
@@ -282,12 +281,6 @@ extern void hrtimer_peek_ahead_timers(void);
 # define MONOTONIC_RES_NSEC	LOW_RES_NSEC
 # define KTIME_MONOTONIC_RES	KTIME_LOW_RES
 
-/*
- * clock_was_set() is a NOP for non- high-resolution systems. The
- * time-sorted order guarantees that a timer does not expire early and
- * is expired in the next softirq when the clock was advanced.
- */
-static inline void clock_was_set(void) { }
 static inline void hrtimer_peek_ahead_timers(void) { }
 
 static inline void hres_timers_resume(void) { }
diff --git a/include/linux/time.h b/include/linux/time.h
index ea16c1a..5bb7e4a 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -131,6 +131,7 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
 extern void do_gettimeofday(struct timeval *tv);
 extern int do_settimeofday(struct timespec *tv);
 extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
+extern struct atomic_notifier_head clockset_notifier_list;
 #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
 extern long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags);
 struct itimerval;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 49da79a..c170b65 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -665,7 +665,8 @@ static void retrigger_next_event(void *arg)
  * resolution timer interrupts. On UP we just disable interrupts and
  * call the high resolution interrupt code.
  */
-void clock_was_set(void)
+static int clock_was_set(struct notifier_block *this, unsigned long event,
+		void *ptr)
 {
 	/* Retrigger the CPU local events everywhere */
 	on_each_cpu(retrigger_next_event, NULL, 1);
@@ -772,7 +773,11 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 }
 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
 static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
-
+static inline int clock_was_set(struct notifier_block *this, unsigned long event,
+		void *ptr)
+{
+	return 0;
+}
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
 #ifdef CONFIG_TIMER_STATS
@@ -1720,11 +1725,16 @@ static struct notifier_block __cpuinitdata hrtimers_nb = {
 	.notifier_call = hrtimer_cpu_notify,
 };
 
+static struct notifier_block hrt_clock_was_set = {
+	.notifier_call  = clock_was_set,
+};
+
 void __init hrtimers_init(void)
 {
 	hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
 			  (void *)(long)smp_processor_id());
 	register_cpu_notifier(&hrtimers_nb);
+	atomic_notifier_chain_register(&clockset_notifier_list, &hrt_clock_was_set);
 #ifdef CONFIG_HIGH_RES_TIMERS
 	open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
 #endif
diff --git a/kernel/time.c b/kernel/time.c
index 2951194..667b959 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -138,7 +138,7 @@ static inline void warp_clock(void)
 	xtime.tv_sec += sys_tz.tz_minuteswest * 60;
 	update_xtime_cache(0);
 	write_sequnlock_irq(&xtime_lock);
-	clock_was_set();
+	atomic_notifier_call_chain(&clockset_notifier_list, 0, NULL);
 }
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e8c77d9..284539f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -18,7 +18,11 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/tick.h>
+#include <linux/notifier.h>
 
+ATOMIC_NOTIFIER_HEAD(clockset_notifier_list);
+
+EXPORT_SYMBOL(clockset_notifier_list);
 
 /*
  * This read-write spinlock protects us from races in SMP while
@@ -174,8 +178,7 @@ int do_settimeofday(struct timespec *tv)
 
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
-	/* signal hrtimers about time change */
-	clock_was_set();
+	atomic_notifier_call_chain(&clockset_notifier_list, 0, NULL);
 
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-09 18:12 [PATCH] [PVOPS] dom0 sync xen wallclock Stefano Stabellini
@ 2010-02-09 18:20 ` Stefano Stabellini
  2010-02-10 23:09 ` Jeremy Fitzhardinge
  1 sibling, 0 replies; 17+ messages in thread
From: Stefano Stabellini @ 2010-02-09 18:20 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: Ian Campbell, Jeremy Fitzhardinge, xen-devel

On Tue, 9 Feb 2010, Stefano Stabellini wrote:
> Hi Jeremy,
> this patch removes clock_was_set and adds an atomic notification chain
> instead so that not only hrtimers but other parts of the kernel can be
> notified of a time change as well.
> In fact xen/time.c needs to be notified so that can update xen
> wallclock time to keep it in sync; this is necessary otherwise other
> PV guests will get a wrong wallclock time at boot.
> Please let me know if my approach is suitable for upstream, I am willing
> to do any change needed and to send the patch upstream myself if you
> give me some directions.

actually I just noticed that I missed few others clock_was_set calls
used in other architectures (cris, blackfin, ...), I'll resend another
patch soon.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-09 18:12 [PATCH] [PVOPS] dom0 sync xen wallclock Stefano Stabellini
  2010-02-09 18:20 ` Stefano Stabellini
@ 2010-02-10 23:09 ` Jeremy Fitzhardinge
  2010-02-11  0:06   ` Keir Fraser
  2010-02-11 11:24   ` Stefano Stabellini
  1 sibling, 2 replies; 17+ messages in thread
From: Jeremy Fitzhardinge @ 2010-02-10 23:09 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: xen-devel, Ian Campbell

On 02/09/2010 10:12 AM, Stefano Stabellini wrote:
> Hi Jeremy,
> this patch removes clock_was_set and adds an atomic notification chain
> instead so that not only hrtimers but other parts of the kernel can be
> notified of a time change as well.
> In fact xen/time.c needs to be notified so that can update xen
> wallclock time to keep it in sync; this is necessary otherwise other
> PV guests will get a wrong wallclock time at boot.
> Please let me know if my approach is suitable for upstream, I am willing
> to do any change needed and to send the patch upstream myself if you
> give me some directions.
>    

I'm not sure this is the right thing to do.  We have a set_wallclock 
pvop, which Xen currently implements as a no-op, but it should do the 
appropriate hypercall to set Xen's time if privileged enough.

Conceptually the Xen persistent time is the same as the platform CMOS 
clock, so I don't think we should update it any differently.  Your patch 
may make sense, but it should also address the native case.   At the 
moment it happens via sync_cmos_clock(), which is called periodically (I 
think) independently of whether the clock has actually been changed.

There is one big difference between the Xen clock and the CMOS clock, 
which is that the Xen clock is being concurrently accessed by other 
domains.  If it is being updated periodically, then there will be 
discontinuities in time which may affect other domains.  But since 
there's no time-warp ABI to Xen, I don't think this can really be 
avoided; anyone reading periodically the Xen clock needs to be able to 
deal with any discontinuities.  pvops kernels only inspect it at boot 
time, and so won't see any subsequent time adjustments anyway.

Thanks,
     J

> Thanks,
>
> Stefano
>
> Signed-off-by: Stefano Stabellini<stefano.stabellini@eu.citrix.com>
>
> ---
>
> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> index 0b56fd4..a0532ec 100644
> --- a/arch/x86/xen/time.c
> +++ b/arch/x86/xen/time.c
> @@ -13,6 +13,7 @@
>   #include<linux/clockchips.h>
>   #include<linux/kernel_stat.h>
>   #include<linux/math64.h>
> +#include<linux/notifier.h>
>
>   #include<asm/pvclock.h>
>   #include<asm/xen/hypervisor.h>
> @@ -526,6 +527,43 @@ static int __init xen_setup_vsyscall_timeinfo(int cpu)
>   }
>   #endif	/* CONFIG_PARAVIRT_CLOCK_VSYSCALL */
>
> +static void sync_xen_wallclock(unsigned long dummy);
> +static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
> +static void sync_xen_wallclock(unsigned long dummy)
> +{
> +	struct xen_platform_op op;
> +	struct timespec ts;
> +
> +	write_seqlock_irq(&xtime_lock);
> +
> +	set_normalized_timespec(&ts, xtime.tv_sec, xtime.tv_nsec);
> +
> +	op.cmd = XENPF_settime;
> +	op.u.settime.secs        = ts.tv_sec;
> +	op.u.settime.nsecs       = ts.tv_nsec;
> +	op.u.settime.system_time = xen_clocksource_read();
> +	WARN_ON(HYPERVISOR_dom0_op(&op));
> +
> +	write_sequnlock_irq(&xtime_lock);
> +
> +	/* Once per minute. */
> +	mod_timer(&sync_xen_wallclock_timer, jiffies + 60*HZ);
> +}
> +
> +int xen_update_persistent_clock(struct notifier_block *this,
> +		unsigned long event, void *ptr)
> +{
> +	if (!xen_initial_domain())
> +		return -1;
> +	mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
> +	return 0;
> +}
> +
> +static struct notifier_block xen_clock_was_set = {
> +	.notifier_call  = xen_update_persistent_clock,
> +};
> +
> +
>   __init void xen_time_init(void)
>   {
>   	int cpu = smp_processor_id();
> @@ -549,6 +587,8 @@ __init void xen_time_init(void)
>   	xen_setup_runstate_info(cpu);
>   	xen_setup_timer(cpu);
>   	xen_setup_cpu_clockevents();
> +	if (xen_initial_domain())
> +		atomic_notifier_chain_register(&clockset_notifier_list,&xen_clock_was_set);
>
>   #ifdef CONFIG_PARAVIRT_CLOCK_VSYSCALL
>   	if (xen_setup_vsyscall_timeinfo(cpu) == 0)
> diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
> index 5d42d55..422e8ef 100644
> --- a/drivers/xen/manage.c
> +++ b/drivers/xen/manage.c
> @@ -130,7 +130,7 @@ out_resume:
>   	dpm_resume_end(PMSG_RESUME);
>
>   	/* Make sure timer events get retriggered on all CPUs */
> -	clock_was_set();
> +	atomic_notifier_call_chain(&clockset_notifier_list, 0, NULL);
>
>   out_thaw:
>   #ifdef CONFIG_PREEMPT
> diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
> index 4759917..c46a4e5 100644
> --- a/include/linux/hrtimer.h
> +++ b/include/linux/hrtimer.h
> @@ -247,7 +247,6 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
>   #ifdef CONFIG_HIGH_RES_TIMERS
>   struct clock_event_device;
>
> -extern void clock_was_set(void);
>   extern void hres_timers_resume(void);
>   extern void hrtimer_interrupt(struct clock_event_device *dev);
>
> @@ -282,12 +281,6 @@ extern void hrtimer_peek_ahead_timers(void);
>   # define MONOTONIC_RES_NSEC	LOW_RES_NSEC
>   # define KTIME_MONOTONIC_RES	KTIME_LOW_RES
>
> -/*
> - * clock_was_set() is a NOP for non- high-resolution systems. The
> - * time-sorted order guarantees that a timer does not expire early and
> - * is expired in the next softirq when the clock was advanced.
> - */
> -static inline void clock_was_set(void) { }
>   static inline void hrtimer_peek_ahead_timers(void) { }
>
>   static inline void hres_timers_resume(void) { }
> diff --git a/include/linux/time.h b/include/linux/time.h
> index ea16c1a..5bb7e4a 100644
> --- a/include/linux/time.h
> +++ b/include/linux/time.h
> @@ -131,6 +131,7 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
>   extern void do_gettimeofday(struct timeval *tv);
>   extern int do_settimeofday(struct timespec *tv);
>   extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
> +extern struct atomic_notifier_head clockset_notifier_list;
>   #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
>   extern long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags);
>   struct itimerval;
> diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
> index 49da79a..c170b65 100644
> --- a/kernel/hrtimer.c
> +++ b/kernel/hrtimer.c
> @@ -665,7 +665,8 @@ static void retrigger_next_event(void *arg)
>    * resolution timer interrupts. On UP we just disable interrupts and
>    * call the high resolution interrupt code.
>    */
> -void clock_was_set(void)
> +static int clock_was_set(struct notifier_block *this, unsigned long event,
> +		void *ptr)
>   {
>   	/* Retrigger the CPU local events everywhere */
>   	on_each_cpu(retrigger_next_event, NULL, 1);
> @@ -772,7 +773,11 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
>   }
>   static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
>   static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
> -
> +static inline int clock_was_set(struct notifier_block *this, unsigned long event,
> +		void *ptr)
> +{
> +	return 0;
> +}
>   #endif /* CONFIG_HIGH_RES_TIMERS */
>
>   #ifdef CONFIG_TIMER_STATS
> @@ -1720,11 +1725,16 @@ static struct notifier_block __cpuinitdata hrtimers_nb = {
>   	.notifier_call = hrtimer_cpu_notify,
>   };
>
> +static struct notifier_block hrt_clock_was_set = {
> +	.notifier_call  = clock_was_set,
> +};
> +
>   void __init hrtimers_init(void)
>   {
>   	hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
>   			  (void *)(long)smp_processor_id());
>   	register_cpu_notifier(&hrtimers_nb);
> +	atomic_notifier_chain_register(&clockset_notifier_list,&hrt_clock_was_set);
>   #ifdef CONFIG_HIGH_RES_TIMERS
>   	open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
>   #endif
> diff --git a/kernel/time.c b/kernel/time.c
> index 2951194..667b959 100644
> --- a/kernel/time.c
> +++ b/kernel/time.c
> @@ -138,7 +138,7 @@ static inline void warp_clock(void)
>   	xtime.tv_sec += sys_tz.tz_minuteswest * 60;
>   	update_xtime_cache(0);
>   	write_sequnlock_irq(&xtime_lock);
> -	clock_was_set();
> +	atomic_notifier_call_chain(&clockset_notifier_list, 0, NULL);
>   }
>
>   /*
> diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
> index e8c77d9..284539f 100644
> --- a/kernel/time/timekeeping.c
> +++ b/kernel/time/timekeeping.c
> @@ -18,7 +18,11 @@
>   #include<linux/jiffies.h>
>   #include<linux/time.h>
>   #include<linux/tick.h>
> +#include<linux/notifier.h>
>
> +ATOMIC_NOTIFIER_HEAD(clockset_notifier_list);
> +
> +EXPORT_SYMBOL(clockset_notifier_list);
>
>   /*
>    * This read-write spinlock protects us from races in SMP while
> @@ -174,8 +178,7 @@ int do_settimeofday(struct timespec *tv)
>
>   	write_sequnlock_irqrestore(&xtime_lock, flags);
>
> -	/* signal hrtimers about time change */
> -	clock_was_set();
> +	atomic_notifier_call_chain(&clockset_notifier_list, 0, NULL);
>
>   	return 0;
>   }
>
>    

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-10 23:09 ` Jeremy Fitzhardinge
@ 2010-02-11  0:06   ` Keir Fraser
  2010-02-11  0:47     ` Jeremy Fitzhardinge
  2010-02-11 11:24   ` Stefano Stabellini
  1 sibling, 1 reply; 17+ messages in thread
From: Keir Fraser @ 2010-02-11  0:06 UTC (permalink / raw)
  To: Jeremy Fitzhardinge, Stefano Stabellini; +Cc: Ian Campbell, xen-devel

On 10/02/2010 23:09, "Jeremy Fitzhardinge" <jeremy@goop.org> wrote:

> There is one big difference between the Xen clock and the CMOS clock,
> which is that the Xen clock is being concurrently accessed by other
> domains.  If it is being updated periodically, then there will be
> discontinuities in time which may affect other domains.  But since
> there's no time-warp ABI to Xen, I don't think this can really be
> avoided; anyone reading periodically the Xen clock needs to be able to
> deal with any discontinuities.  pvops kernels only inspect it at boot
> time, and so won't see any subsequent time adjustments anyway.

A Xen ABI for adjtime()-alike warping of Xen system time is something we
should add at some point, as that's the logical thing for ntp in dom0 to
affect, keeping the system's 'nanosecond ticker' in sync with the wider
world. That'd keep all domU clocks in sync, all except for fixed wallclock
offsets. But it doesn't currently exist, so instead it gets bunged in with
Xen's existing settimeofday()-alike interface: hence domUs either can see
discontinuities, or don't get the automatic benefits of ntp sync at all,
because they only read the wallclock offset at boot time.

 -- Keir

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-11  0:06   ` Keir Fraser
@ 2010-02-11  0:47     ` Jeremy Fitzhardinge
  2010-02-11  8:03       ` Keir Fraser
  0 siblings, 1 reply; 17+ messages in thread
From: Jeremy Fitzhardinge @ 2010-02-11  0:47 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Ian Campbell, xen-devel, Stefano Stabellini

On 02/10/2010 04:06 PM, Keir Fraser wrote:
> A Xen ABI for adjtime()-alike warping of Xen system time is something we
> should add at some point, as that's the logical thing for ntp in dom0 to
> affect, keeping the system's 'nanosecond ticker' in sync with the wider
> world.

Hm.  I think it would be best to keep the ticker unchanged, but perhaps 
offer a mechanism so that dom0 can set separate offset and drift 
parameters which other domains can query to may system time to global 
time.  But I don't think even that is a very compelling idea.

>   That'd keep all domU clocks in sync, all except for fixed wallclock
> offsets. But it doesn't currently exist, so instead it gets bunged in with
> Xen's existing settimeofday()-alike interface: hence domUs either can see
> discontinuities, or don't get the automatic benefits of ntp sync at all,
> because they only read the wallclock offset at boot time.
>    

I suppose.  All the domains can already run ntp if you really want to 
keep them in sync with each other and/or dom0 and/or the outside 
world.   I'm not sure there's much point in making Xen ABI changes in 
this area; anything we do will come down to being a partial and probably 
inadequate reimplementation of ntp (esp when you consider how to deal 
with migration).

     J

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-11  0:47     ` Jeremy Fitzhardinge
@ 2010-02-11  8:03       ` Keir Fraser
  2010-02-11 19:16         ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 17+ messages in thread
From: Keir Fraser @ 2010-02-11  8:03 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Ian Campbell, xen-devel, Stefano Stabellini

On 11/02/2010 00:47, "Jeremy Fitzhardinge" <jeremy@goop.org> wrote:

> On 02/10/2010 04:06 PM, Keir Fraser wrote:
>> A Xen ABI for adjtime()-alike warping of Xen system time is something we
>> should add at some point, as that's the logical thing for ntp in dom0 to
>> affect, keeping the system's 'nanosecond ticker' in sync with the wider
>> world.
> 
> Hm.  I think it would be best to keep the ticker unchanged, but perhaps
> offer a mechanism so that dom0 can set separate offset and drift
> parameters which other domains can query to may system time to global
> time.  But I don't think even that is a very compelling idea.

Hm, well your suggestion is certainly less work. :-D

 -- Keir

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-10 23:09 ` Jeremy Fitzhardinge
  2010-02-11  0:06   ` Keir Fraser
@ 2010-02-11 11:24   ` Stefano Stabellini
  2010-02-11 11:36     ` Ian Campbell
  2010-02-18 23:43     ` Jeremy Fitzhardinge
  1 sibling, 2 replies; 17+ messages in thread
From: Stefano Stabellini @ 2010-02-11 11:24 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Ian Campbell, xen-devel, Stefano Stabellini

On Wed, 10 Feb 2010, Jeremy Fitzhardinge wrote:
> I'm not sure this is the right thing to do.  We have a set_wallclock 
> pvop, which Xen currently implements as a no-op, but it should do the 
> appropriate hypercall to set Xen's time if privileged enough.
> 
> Conceptually the Xen persistent time is the same as the platform CMOS 
> clock, so I don't think we should update it any differently.  Your patch 
> may make sense, but it should also address the native case.   At the 
> moment it happens via sync_cmos_clock(), which is called periodically (I 
> think) independently of whether the clock has actually been changed.
> 
> There is one big difference between the Xen clock and the CMOS clock, 
> which is that the Xen clock is being concurrently accessed by other 
> domains.  If it is being updated periodically, then there will be 
> discontinuities in time which may affect other domains.  But since 
> there's no time-warp ABI to Xen, I don't think this can really be 
> avoided; anyone reading periodically the Xen clock needs to be able to 
> deal with any discontinuities.  pvops kernels only inspect it at boot 
> time, and so won't see any subsequent time adjustments anyway.
> 

Linux 2.6.18 does consider xen persistent time as the platform
CMOS clock, but I don't think this is what we actually want: if we run
ntpd in dom0 we probably want to sync xen time with dom0 time more often
than linux usually update the CMOS clock.
In particular we want that as soon as ntpd in dom0 set the right time,
it gets propagated in xen so that all the PV guests created after that
moment can read the right wallclock at boot.
I think that the right approach to achieve that is to break the
assumption that xen persistent time is like the CMOS and treat it more
like xtime instead.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-11 11:24   ` Stefano Stabellini
@ 2010-02-11 11:36     ` Ian Campbell
  2010-02-18 23:43     ` Jeremy Fitzhardinge
  1 sibling, 0 replies; 17+ messages in thread
From: Ian Campbell @ 2010-02-11 11:36 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: Jeremy Fitzhardinge, xen-devel

On Thu, 2010-02-11 at 11:24 +0000, Stefano Stabellini wrote:
> On Wed, 10 Feb 2010, Jeremy Fitzhardinge wrote:
> > I'm not sure this is the right thing to do.  We have a set_wallclock 
> > pvop, which Xen currently implements as a no-op, but it should do the 
> > appropriate hypercall to set Xen's time if privileged enough.
> > 
> > Conceptually the Xen persistent time is the same as the platform CMOS 
> > clock, so I don't think we should update it any differently.  Your patch 
> > may make sense, but it should also address the native case.   At the 
> > moment it happens via sync_cmos_clock(), which is called periodically (I 
> > think) independently of whether the clock has actually been changed.
> > 
> > There is one big difference between the Xen clock and the CMOS clock, 
> > which is that the Xen clock is being concurrently accessed by other 
> > domains.  If it is being updated periodically, then there will be 
> > discontinuities in time which may affect other domains.  But since 
> > there's no time-warp ABI to Xen, I don't think this can really be 
> > avoided; anyone reading periodically the Xen clock needs to be able to 
> > deal with any discontinuities.  pvops kernels only inspect it at boot 
> > time, and so won't see any subsequent time adjustments anyway.
> > 
> 
> Linux 2.6.18 does consider xen persistent time as the platform
> CMOS clock, but I don't think this is what we actually want: if we run
> ntpd in dom0 we probably want to sync xen time with dom0 time more often
> than linux usually update the CMOS clock.
> In particular we want that as soon as ntpd in dom0 set the right time,
> it gets propagated in xen so that all the PV guests created after that
> moment can read the right wallclock at boot.
> I think that the right approach to achieve that is to break the
> assumption that xen persistent time is like the CMOS and treat it more
> like xtime instead.

I agree, guests which have a dependent wallclock are reading this
variable from the hypervisor as their xtime, not their CMOS RTC. Having
domain 0 write it as if it were the CMOS seems asymmetric with this
approach.

Any explicit setting of the time in domain 0 should be immediately
propagated through to the hypervisor, with all the usual caveats that
calling settimeofday has on any normal Linux system, but it appears it
is necessary for the same reasons we usually allow ntpd (or ntpdate) to
step the time once at start of day. Without this we have to wait some
time after boot for the time setting to be propagated, this delay is
often long enough to have started several VMs which will now be running
with the wrong time and/or see even worse discontinuities when the time
is eventually set.

As for propagating variations derived from the ntp drift calculations a
drift based interface to the hypervisor would be an improvement but I
think what Stefano proposes is basically inline with historical domain 0
behaviour. Guests which have a dependent wallclock already handle any
discontinuities by adding their own monotonicity checks.

One tricky issue with pushing drift parameters into the hypervisor is
that presumably ntp would need to calculate them based on the
hypervisor's idea of time, calculating based on domain 0's idea of time
when the two are allowed to drift seems like it would simply lead to
using the wrong drift paramters and making things worse for the
hypervisor.

Ian.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-11  8:03       ` Keir Fraser
@ 2010-02-11 19:16         ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 17+ messages in thread
From: Jeremy Fitzhardinge @ 2010-02-11 19:16 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Ian Campbell, xen-devel, Stefano Stabellini

On 02/11/2010 12:03 AM, Keir Fraser wrote:
>> Hm.  I think it would be best to keep the ticker unchanged, but perhaps
>> offer a mechanism so that dom0 can set separate offset and drift
>> parameters which other domains can query to may system time to global
>> time.  But I don't think even that is a very compelling idea.
>>      
> Hm, well your suggestion is certainly less work. :-D
>    

Yep, always a plus.

     J

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-11 11:24   ` Stefano Stabellini
  2010-02-11 11:36     ` Ian Campbell
@ 2010-02-18 23:43     ` Jeremy Fitzhardinge
  2010-02-19  1:02       ` Dan Magenheimer
  2010-02-19  9:10       ` Ian Campbell
  1 sibling, 2 replies; 17+ messages in thread
From: Jeremy Fitzhardinge @ 2010-02-18 23:43 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: Ian Campbell, xen-devel

On 02/11/2010 03:24 AM, Stefano Stabellini wrote:
> On Wed, 10 Feb 2010, Jeremy Fitzhardinge wrote:
>    
>> I'm not sure this is the right thing to do.  We have a set_wallclock
>> pvop, which Xen currently implements as a no-op, but it should do the
>> appropriate hypercall to set Xen's time if privileged enough.
>>
>> Conceptually the Xen persistent time is the same as the platform CMOS
>> clock, so I don't think we should update it any differently.  Your patch
>> may make sense, but it should also address the native case.   At the
>> moment it happens via sync_cmos_clock(), which is called periodically (I
>> think) independently of whether the clock has actually been changed.
>>
>> There is one big difference between the Xen clock and the CMOS clock,
>> which is that the Xen clock is being concurrently accessed by other
>> domains.  If it is being updated periodically, then there will be
>> discontinuities in time which may affect other domains.  But since
>> there's no time-warp ABI to Xen, I don't think this can really be
>> avoided; anyone reading periodically the Xen clock needs to be able to
>> deal with any discontinuities.  pvops kernels only inspect it at boot
>> time, and so won't see any subsequent time adjustments anyway.
>>
>>      
> Linux 2.6.18 does consider xen persistent time as the platform
> CMOS clock, but I don't think this is what we actually want: if we run
> ntpd in dom0 we probably want to sync xen time with dom0 time more often
> than linux usually update the CMOS clock.
> In particular we want that as soon as ntpd in dom0 set the right time,
> it gets propagated in xen so that all the PV guests created after that
> moment can read the right wallclock at boot.
>    

If you're making the assumption that a guest will query the Xen 
wallclock time precisely once, then I guess that will work OK.  But if 
you expect them to query it more than once, then you need to add a 
notion of drift correction to Xen's system clock so that you can warp it 
in a continuous fashion.  But that complicates things because dom0 
itself is using the Xen system clock as its own timebase which it is 
also applying drift correction to.  So to make that work you either need 
to introduce distinct notions of corrected and uncorrected Xen clocks 
and make domains query them appropriately, or more deeply PV time in the 
domains so they directly use/adjust the Xen drift factor rather than 
maintaining their own.


> I think that the right approach to achieve that is to break the
> assumption that xen persistent time is like the CMOS and treat it more
> like xtime instead.
>    

I don't really see why wallclock time is any of Xen's business.  It 
seems like something that could be completely dropped with no loss of 
functionality.  It's somewhat useful for domains to be able to fetch an 
approximately correct wallclock time early in boot, but aside from that 
its fairly useless.

I would prefer to see:

    * dom0 gets time of day from CMOS or whatever hardware mechanism it
      wants
    * PV domUs get a rough time of day at boot either from a simple
      "seconds since 1970" in the startup info put there by the domain
      builder from dom0's clock (so there's no illusion that it is
      anything other than a one-time start time for the domain)
    * HVM domains similarly get an emulated CMOS implemented in qemu
    * anyone who wants continuously tracked accurate time needs to run ntp

and deprecate any notion of wallclock time from Xen.

     J

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-18 23:43     ` Jeremy Fitzhardinge
@ 2010-02-19  1:02       ` Dan Magenheimer
  2010-02-19  9:10       ` Ian Campbell
  1 sibling, 0 replies; 17+ messages in thread
From: Dan Magenheimer @ 2010-02-19  1:02 UTC (permalink / raw)
  To: Jeremy Fitzhardinge, Stefano Stabellini; +Cc: Ian Campbell, xen-devel

> I don't really see why wallclock time is any of Xen's business.

While I agree in principal, I ran across a case recently where
it could/would be very useful:  Recent Linux kernels running
on very latest processors use the TSC as a clocksource.  VMware
can and does support this, even across save/restore/migration.
Supporting it across migration requires the hypervisor to
have some equivalent of wallclock time that can be synchronized
across machines in a "migration pool".  I *think* VMware
can use NTP to do this (because their hypervisor is closer
to a full OS than Xen is).

Without this, Xen cannot allow an OS to select the TSC as
a clocksource unless it disallows save/restore/migration.
(Thus the "no_migrate" flag recently added in Xen.)  The
result is a small but not insignificant performance
advantage for VMware on some benchmarks, which may appear
in upcoming 2.6.32-based distros.

Of course, it may also be possible to get this from dom0
but for performance reasons some concept of wallclock "offset"
must reside in the hypervisor.

> -----Original Message-----
> From: Jeremy Fitzhardinge [mailto:jeremy@goop.org]
> Sent: Thursday, February 18, 2010 4:44 PM
> To: Stefano Stabellini
> Cc: Ian Campbell; xen-devel@lists.xensource.com
> Subject: [Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
> 
> On 02/11/2010 03:24 AM, Stefano Stabellini wrote:
> > On Wed, 10 Feb 2010, Jeremy Fitzhardinge wrote:
> >
> >> I'm not sure this is the right thing to do.  We have a set_wallclock
> >> pvop, which Xen currently implements as a no-op, but it should do
> the
> >> appropriate hypercall to set Xen's time if privileged enough.
> >>
> >> Conceptually the Xen persistent time is the same as the platform
> CMOS
> >> clock, so I don't think we should update it any differently.  Your
> patch
> >> may make sense, but it should also address the native case.   At the
> >> moment it happens via sync_cmos_clock(), which is called
> periodically (I
> >> think) independently of whether the clock has actually been changed.
> >>
> >> There is one big difference between the Xen clock and the CMOS
> clock,
> >> which is that the Xen clock is being concurrently accessed by other
> >> domains.  If it is being updated periodically, then there will be
> >> discontinuities in time which may affect other domains.  But since
> >> there's no time-warp ABI to Xen, I don't think this can really be
> >> avoided; anyone reading periodically the Xen clock needs to be able
> to
> >> deal with any discontinuities.  pvops kernels only inspect it at
> boot
> >> time, and so won't see any subsequent time adjustments anyway.
> >>
> >>
> > Linux 2.6.18 does consider xen persistent time as the platform
> > CMOS clock, but I don't think this is what we actually want: if we
> run
> > ntpd in dom0 we probably want to sync xen time with dom0 time more
> often
> > than linux usually update the CMOS clock.
> > In particular we want that as soon as ntpd in dom0 set the right
> time,
> > it gets propagated in xen so that all the PV guests created after
> that
> > moment can read the right wallclock at boot.
> >
> 
> If you're making the assumption that a guest will query the Xen
> wallclock time precisely once, then I guess that will work OK.  But if
> you expect them to query it more than once, then you need to add a
> notion of drift correction to Xen's system clock so that you can warp
> it
> in a continuous fashion.  But that complicates things because dom0
> itself is using the Xen system clock as its own timebase which it is
> also applying drift correction to.  So to make that work you either
> need
> to introduce distinct notions of corrected and uncorrected Xen clocks
> and make domains query them appropriately, or more deeply PV time in
> the
> domains so they directly use/adjust the Xen drift factor rather than
> maintaining their own.
> 
> 
> > I think that the right approach to achieve that is to break the
> > assumption that xen persistent time is like the CMOS and treat it
> more
> > like xtime instead.
> >
> 
> I don't really see why wallclock time is any of Xen's business.  It
> seems like something that could be completely dropped with no loss of
> functionality.  It's somewhat useful for domains to be able to fetch an
> approximately correct wallclock time early in boot, but aside from that
> its fairly useless.
> 
> I would prefer to see:
> 
>     * dom0 gets time of day from CMOS or whatever hardware mechanism it
>       wants
>     * PV domUs get a rough time of day at boot either from a simple
>       "seconds since 1970" in the startup info put there by the domain
>       builder from dom0's clock (so there's no illusion that it is
>       anything other than a one-time start time for the domain)
>     * HVM domains similarly get an emulated CMOS implemented in qemu
>     * anyone who wants continuously tracked accurate time needs to run
> ntp
> 
> and deprecate any notion of wallclock time from Xen.
> 
>      J
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-18 23:43     ` Jeremy Fitzhardinge
  2010-02-19  1:02       ` Dan Magenheimer
@ 2010-02-19  9:10       ` Ian Campbell
  2010-02-19  9:39         ` Keir Fraser
  2010-02-19 23:05         ` Jeremy Fitzhardinge
  1 sibling, 2 replies; 17+ messages in thread
From: Ian Campbell @ 2010-02-19  9:10 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: xen-devel, Stefano Stabellini

On Thu, 2010-02-18 at 23:43 +0000, Jeremy Fitzhardinge wrote:
>     * PV domUs get a rough time of day at boot either from a simple
>       "seconds since 1970" in the startup info put there by the domain
>       builder from dom0's clock (so there's no illusion that it is
>       anything other than a one-time start time for the domain)

Unfortunately there is an existing large installed base of guests which
use the dependent wallclock mode since it is the default in oldstyle Xen
kernels.

>     * HVM domains similarly get an emulated CMOS implemented in qemu 

Isn't this currently emulated within the hypervisor using the wallclock
time within Xen? I assume that it was pulled into the h/v for a specific
reason? I'm not sure what reason though, the emulated RTC doesn't seem
especially performance critical...

Ian.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-19  9:10       ` Ian Campbell
@ 2010-02-19  9:39         ` Keir Fraser
  2010-02-23 12:13           ` Stefano Stabellini
  2010-02-23 14:25           ` Ian Campbell
  2010-02-19 23:05         ` Jeremy Fitzhardinge
  1 sibling, 2 replies; 17+ messages in thread
From: Keir Fraser @ 2010-02-19  9:39 UTC (permalink / raw)
  To: Ian Campbell, Jeremy Fitzhardinge; +Cc: xen-devel, Stefano Stabellini

On 19/02/2010 09:10, "Ian Campbell" <Ian.Campbell@citrix.com> wrote:

> On Thu, 2010-02-18 at 23:43 +0000, Jeremy Fitzhardinge wrote:
>>     * PV domUs get a rough time of day at boot either from a simple
>>       "seconds since 1970" in the startup info put there by the domain
>>       builder from dom0's clock (so there's no illusion that it is
>>       anything other than a one-time start time for the domain)
> 
> Unfortunately there is an existing large installed base of guests which
> use the dependent wallclock mode since it is the default in oldstyle Xen
> kernels.

That still plays okay with dom0 setting Xen wallclock only once, or not at
all (since Xen primes its WC values from CMOS). The wallclock values would
then never change and old kernels naturally just pick up WC at boot and
never again. And their default mode is also compatible with them running
ntpd -- if the kernel detects ntp sync then it changes/reduces how it syncs
with Xen.

 -- Keir

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-19  9:10       ` Ian Campbell
  2010-02-19  9:39         ` Keir Fraser
@ 2010-02-19 23:05         ` Jeremy Fitzhardinge
  2010-02-23 14:25           ` Ian Campbell
  1 sibling, 1 reply; 17+ messages in thread
From: Jeremy Fitzhardinge @ 2010-02-19 23:05 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel, Stefano Stabellini

On 02/19/2010 01:10 AM, Ian Campbell wrote:
> Unfortunately there is an existing large installed base of guests which
> use the dependent wallclock mode since it is the default in oldstyle Xen
> kernels.
>    

Sure.  I'm not suggesting that we remove the existing hypercall.  I just 
don't think we should extend it or encourage its use.

>>      * HVM domains similarly get an emulated CMOS implemented in qemu
>>      
> Isn't this currently emulated within the hypervisor using the wallclock
> time within Xen? I assume that it was pulled into the h/v for a specific
> reason? I'm not sure what reason though, the emulated RTC doesn't seem
> especially performance critical...
>    

It is?  I wonder why we bothered adding a pvop for it then...

     J

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-19  9:39         ` Keir Fraser
@ 2010-02-23 12:13           ` Stefano Stabellini
  2010-02-23 14:25           ` Ian Campbell
  1 sibling, 0 replies; 17+ messages in thread
From: Stefano Stabellini @ 2010-02-23 12:13 UTC (permalink / raw)
  To: Keir Fraser
  Cc: Ian Campbell, Jeremy Fitzhardinge, xen-devel, Stefano Stabellini

on fri, 19 feb 2010, keir fraser wrote:
> on 19/02/2010 09:10, "ian campbell" <ian.campbell@citrix.com> wrote:
> 
> > on thu, 2010-02-18 at 23:43 +0000, jeremy fitzhardinge wrote:
> >>     * pv domus get a rough time of day at boot either from a simple
> >>       "seconds since 1970" in the startup info put there by the domain
> >>       builder from dom0's clock (so there's no illusion that it is
> >>       anything other than a one-time start time for the domain)
> > 
> > unfortunately there is an existing large installed base of guests which
> > use the dependent wallclock mode since it is the default in oldstyle xen
> > kernels.
> 
> that still plays okay with dom0 setting xen wallclock only once, or not at
> all (since xen primes its wc values from cmos). the wallclock values would
> then never change and old kernels naturally just pick up wc at boot and
> never again. and their default mode is also compatible with them running
> ntpd -- if the kernel detects ntp sync then it changes/reduces how it syncs
> with xen.
> 

Even if we decide that dom0 should sync the wallclock only once with
xen, ideally right after ntp sync's, we still need to hook into
clock_was_set and therefore do something very similar to what my patch
does (apart from the timer to keep updating the xen wallclock) because
otherwise if we just use the pvop set_wallclock it might take several
minutes before having a single wallclock update.
The reason for this is that the wallclock update policy of the kernel is
very lax and I don't think it is suitable for our use case; in
particular the classic initial time sync done with ntpdate at boot time
by most distros doesn't propagate the wallclock to the hardware and\or
xen.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-19 23:05         ` Jeremy Fitzhardinge
@ 2010-02-23 14:25           ` Ian Campbell
  0 siblings, 0 replies; 17+ messages in thread
From: Ian Campbell @ 2010-02-23 14:25 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: xen-devel, Stefano Stabellini

On Fri, 2010-02-19 at 23:05 +0000, Jeremy Fitzhardinge wrote:
> 
> > Unfortunately there is an existing large installed base of guests
> which
> > use the dependent wallclock mode since it is the default in oldstyle
> Xen
> > kernels.
> >    
> 
> Sure.  I'm not suggesting that we remove the existing hypercall.  I
> just don't think we should extend it or encourage its use. 

We currently have a large installed base of guests which use dependent
wallclock and therefore expect the Xen wallclock to be correct when they
start and to be remain roughly in sync thereafter (and these guests cope
with the sawtoothing this implies).

I think the first half of that behaviour (correct when they start) is
most crucial here. As it stands the Xen wallclock time doesn't get
updated for several minutes after boot because ntpdate doesn't cause a
sync to the hypervisor nor hardware RTC and ntpd takes ages to decide it
has syncd which means that any PV guests which start in that interval
and use dependent wallclock (which is currently most) will see a large
time discontinuity whenever ntpd in domain 0 is happy enough to actually
sync the RTC (which as a side-effect pushes a new time into Xen) not to
mention they will be running with some bogus time for that entire
interval. Hooking clock_was_set fixes this issue

If we choose not to include the timer aspect which keeps the hypervisor
wallclock roughly in sync we need to communicate this change to. They
now need to run ntpd in their guest where previously we advised the
opposite (for example in
http://wiki.xensource.com/xenwiki/InstallationNotes but this also
impacts distro documentation and google-fu).

Ian.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH] [PVOPS] dom0 sync xen wallclock
  2010-02-19  9:39         ` Keir Fraser
  2010-02-23 12:13           ` Stefano Stabellini
@ 2010-02-23 14:25           ` Ian Campbell
  1 sibling, 0 replies; 17+ messages in thread
From: Ian Campbell @ 2010-02-23 14:25 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Jeremy Fitzhardinge, xen-devel, Stefano Stabellini

On Fri, 2010-02-19 at 09:39 +0000, Keir Fraser wrote:
> On 19/02/2010 09:10, "Ian Campbell" <Ian.Campbell@citrix.com> wrote:
> 
> > On Thu, 2010-02-18 at 23:43 +0000, Jeremy Fitzhardinge wrote:
> >>     * PV domUs get a rough time of day at boot either from a simple
> >>       "seconds since 1970" in the startup info put there by the domain
> >>       builder from dom0's clock (so there's no illusion that it is
> >>       anything other than a one-time start time for the domain)
> > 
> > Unfortunately there is an existing large installed base of guests which
> > use the dependent wallclock mode since it is the default in oldstyle Xen
> > kernels.
> 
> That still plays okay with dom0 setting Xen wallclock only once, or not at
> all (since Xen primes its WC values from CMOS).

I think we should try and arrange to do it once so we cope more
gracefully with wildly incorrect CMOS clocks.

>  The wallclock values would
> then never change and old kernels naturally just pick up WC at boot and
> never again. And their default mode is also compatible with them running
> ntpd -- if the kernel detects ntp sync then it changes/reduces how it syncs
> with Xen.

But these guests won't be running ntpd because they expect domain 0 to
be doing it for them, via the dependent wallclock mode.

I don't disagree that we could transition to a better solution but we
are currently not providing the host level behaviour that existing
guests expect.

Ian.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2010-02-23 14:25 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-09 18:12 [PATCH] [PVOPS] dom0 sync xen wallclock Stefano Stabellini
2010-02-09 18:20 ` Stefano Stabellini
2010-02-10 23:09 ` Jeremy Fitzhardinge
2010-02-11  0:06   ` Keir Fraser
2010-02-11  0:47     ` Jeremy Fitzhardinge
2010-02-11  8:03       ` Keir Fraser
2010-02-11 19:16         ` Jeremy Fitzhardinge
2010-02-11 11:24   ` Stefano Stabellini
2010-02-11 11:36     ` Ian Campbell
2010-02-18 23:43     ` Jeremy Fitzhardinge
2010-02-19  1:02       ` Dan Magenheimer
2010-02-19  9:10       ` Ian Campbell
2010-02-19  9:39         ` Keir Fraser
2010-02-23 12:13           ` Stefano Stabellini
2010-02-23 14:25           ` Ian Campbell
2010-02-19 23:05         ` Jeremy Fitzhardinge
2010-02-23 14:25           ` Ian Campbell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.