All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Align periodic vpts
@ 2009-02-11 11:05 Wei, Gang
  2009-02-11 11:23 ` Keir Fraser
  2009-02-11 11:33 ` Keir Fraser
  0 siblings, 2 replies; 14+ messages in thread
From: Wei, Gang @ 2009-02-11 11:05 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 2533 bytes --]

Hi,

Aligned periodic vpts can improve the HVM guest power consumption a lot, especially while the guest using high HZ such as 1000HZ.
This patch aligns all periodic vpts except vlapic to the period bound. For vlapic, only make it aligned while using the new option "align_periodic_vpt".

Signed-off-by: Wei Gang <gang.wei@intel.com>

diff -r 4ac8bc60c000 xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c	Tue Feb 10 05:51:00 2009 +0000
+++ b/xen/arch/x86/hvm/vpt.c	Wed Feb 11 18:12:27 2009 +0800
@@ -354,6 +354,22 @@ void pt_migrate(struct vcpu *v)
     spin_unlock(&v->arch.hvm_vcpu.tm_lock);
 }
 
+/* 
+ * option "align_periodic_vpt" will make vlapic's expires aligned with other
+ * vpts while possible.
+ *
+ * CAUTION:
+ * While vlapic timer ticking too close to the pit.  We saw a userspace
+ * application getting the wrong answer because long CPU bound sequences
+ * appeared to run with zero CPU time. This only showed up with old Linux
+ * kernels (IIRC, it was with Red Hat 3 U8). So this option may cause a
+ * regression in this case.
+ */
+static int opt_align_periodic_vpt = 0;
+boolean_param("align_periodic_vpt", opt_align_periodic_vpt);
+
+extern s_time_t align_timer(s_time_t firsttick, uint64_t period);
+
 void create_periodic_time(
     struct vcpu *v, struct periodic_time *pt, uint64_t delta,
     uint64_t period, uint8_t irq, time_cb *cb, void *data)
@@ -389,8 +405,13 @@ void create_periodic_time(
      * LAPIC ticks for process accounting can see long sequences of process
      * ticks incorrectly accounted to interrupt processing.
      */
-    if ( !pt->one_shot && (pt->source == PTSRC_lapic) )
-        pt->scheduled += delta >> 1;
+    if ( !pt->one_shot )
+    {
+        pt->scheduled = align_timer(pt->scheduled, pt->period);
+        if ( !opt_align_periodic_vpt && (pt->source == PTSRC_lapic) )
+            pt->scheduled += delta >> 1;
+    }
+
     pt->cb = cb;
     pt->priv = data;
 
diff -r 4ac8bc60c000 xen/common/timer.c
--- a/xen/common/timer.c	Tue Feb 10 05:51:00 2009 +0000
+++ b/xen/common/timer.c	Wed Feb 11 18:12:27 2009 +0800
@@ -473,6 +473,14 @@ void process_pending_timers(void)
         timer_softirq_action();
 }
 
+/* calculate the aligned first tick time for the given periodic vpt */ 
+s_time_t align_timer(s_time_t firsttick, uint64_t period)
+{
+    if ( !period )
+        return firsttick;
+
+    return firsttick + period - (firsttick % period);
+}
 
 static void dump_timerq(unsigned char key)
 {

[-- Attachment #2: vpt-align-0211-2.patch --]
[-- Type: application/octet-stream, Size: 2484 bytes --]

Align periodic vpts

Aligned periodic vpts can improve the HVM guest power consumption a lot, especially while the guest using high HZ such as 1000HZ.
This patch aligns all periodic vpts except vlapic to the period bound. For vlapic, only make it aligned while using the new option "align_periodic_vpt".

Signed-off-by: Wei Gang <gang.wei@intel.com>

diff -r 4ac8bc60c000 xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c	Tue Feb 10 05:51:00 2009 +0000
+++ b/xen/arch/x86/hvm/vpt.c	Wed Feb 11 18:12:27 2009 +0800
@@ -354,6 +354,22 @@ void pt_migrate(struct vcpu *v)
     spin_unlock(&v->arch.hvm_vcpu.tm_lock);
 }
 
+/* 
+ * option "align_periodic_vpt" will make vlapic's expires aligned with other
+ * vpts while possible.
+ *
+ * CAUTION:
+ * While vlapic timer ticking too close to the pit.  We saw a userspace
+ * application getting the wrong answer because long CPU bound sequences
+ * appeared to run with zero CPU time. This only showed up with old Linux
+ * kernels (IIRC, it was with Red Hat 3 U8). So this option may cause a
+ * regression in this case.
+ */
+static int opt_align_periodic_vpt = 0;
+boolean_param("align_periodic_vpt", opt_align_periodic_vpt);
+
+extern s_time_t align_timer(s_time_t firsttick, uint64_t period);
+
 void create_periodic_time(
     struct vcpu *v, struct periodic_time *pt, uint64_t delta,
     uint64_t period, uint8_t irq, time_cb *cb, void *data)
@@ -389,8 +405,13 @@ void create_periodic_time(
      * LAPIC ticks for process accounting can see long sequences of process
      * ticks incorrectly accounted to interrupt processing.
      */
-    if ( !pt->one_shot && (pt->source == PTSRC_lapic) )
-        pt->scheduled += delta >> 1;
+    if ( !pt->one_shot )
+    {
+        pt->scheduled = align_timer(pt->scheduled, pt->period);
+        if ( !opt_align_periodic_vpt && (pt->source == PTSRC_lapic) )
+            pt->scheduled += delta >> 1;
+    }
+
     pt->cb = cb;
     pt->priv = data;
 
diff -r 4ac8bc60c000 xen/common/timer.c
--- a/xen/common/timer.c	Tue Feb 10 05:51:00 2009 +0000
+++ b/xen/common/timer.c	Wed Feb 11 18:12:27 2009 +0800
@@ -473,6 +473,14 @@ void process_pending_timers(void)
         timer_softirq_action();
 }
 
+/* calculate the aligned first tick time for the given periodic vpt */ 
+s_time_t align_timer(s_time_t firsttick, uint64_t period)
+{
+    if ( !period )
+        return firsttick;
+
+    return firsttick + period - (firsttick % period);
+}
 
 static void dump_timerq(unsigned char key)
 {

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Align periodic vpts
  2009-02-11 11:05 [PATCH] Align periodic vpts Wei, Gang
@ 2009-02-11 11:23 ` Keir Fraser
  2009-02-11 11:41   ` Tian, Kevin
  2009-02-11 14:55   ` Dan Magenheimer
  2009-02-11 11:33 ` Keir Fraser
  1 sibling, 2 replies; 14+ messages in thread
From: Keir Fraser @ 2009-02-11 11:23 UTC (permalink / raw)
  To: Wei, Gang, xen-devel

On 11/02/2009 11:05, "Wei, Gang" <gang.wei@intel.com> wrote:

> + * CAUTION:
> + * While vlapic timer ticking too close to the pit.  We saw a userspace
> + * application getting the wrong answer because long CPU bound sequences
> + * appeared to run with zero CPU time. This only showed up with old Linux
> + * kernels (IIRC, it was with Red Hat 3 U8). So this option may cause a
> + * regression in this case.
> + */
> +static int opt_align_periodic_vpt = 0;
> +boolean_param("align_periodic_vpt", opt_align_periodic_vpt);
> +

Presumably there are common cases where not aligning vlapic too has
significant power overheads? Personally I'm not sure I care too much about a
minor regression on RH3, if this patch is worthwhile at all I think it
should be always on and at most have a domain config option. I think a boot
option will never ever be used.

 -- Keir

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Align periodic vpts
  2009-02-11 11:05 [PATCH] Align periodic vpts Wei, Gang
  2009-02-11 11:23 ` Keir Fraser
@ 2009-02-11 11:33 ` Keir Fraser
  2009-02-11 11:48   ` Tian, Kevin
  1 sibling, 1 reply; 14+ messages in thread
From: Keir Fraser @ 2009-02-11 11:33 UTC (permalink / raw)
  To: Wei, Gang, xen-devel

On 11/02/2009 11:05, "Wei, Gang" <gang.wei@intel.com> wrote:

> Aligned periodic vpts can improve the HVM guest power consumption a lot,
> especially while the guest using high HZ such as 1000HZ.
> This patch aligns all periodic vpts except vlapic to the period bound. For
> vlapic, only make it aligned while using the new option "align_periodic_vpt".
> 
> Signed-off-by: Wei Gang <gang.wei@intel.com>

Also, Intel already contributed code to merge up timers. It's the
expiry-range patch in common/timer.c. This could be used by vpt.c to add a
per-domain acceptable range on vpt expiries. High-frequency timers would
then naturally fire together. Having a per-domain config option for this
would be something that would actually seem more generically useful (could
be used perhaps for other timers beyond vpt.c even).

This seems to me a more intuitive and gracefully selectable/de-selectable
alternative to this proposed patch, which really looks like a hardcoded
hack.

 -- Keir

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] Align periodic vpts
  2009-02-11 11:23 ` Keir Fraser
@ 2009-02-11 11:41   ` Tian, Kevin
  2009-02-11 14:55   ` Dan Magenheimer
  1 sibling, 0 replies; 14+ messages in thread
From: Tian, Kevin @ 2009-02-11 11:41 UTC (permalink / raw)
  To: 'Keir Fraser', Wei, Gang, xen-devel

>From: Keir Fraser
>Sent: Wednesday, February 11, 2009 7:23 PM
>
>On 11/02/2009 11:05, "Wei, Gang" <gang.wei@intel.com> wrote:
>
>> + * CAUTION:
>> + * While vlapic timer ticking too close to the pit.  We saw 
>a userspace
>> + * application getting the wrong answer because long CPU 
>bound sequences
>> + * appeared to run with zero CPU time. This only showed up 
>with old Linux
>> + * kernels (IIRC, it was with Red Hat 3 U8). So this option 
>may cause a
>> + * regression in this case.
>> + */
>> +static int opt_align_periodic_vpt = 0;
>> +boolean_param("align_periodic_vpt", opt_align_periodic_vpt);
>> +
>
>Presumably there are common cases where not aligning vlapic too has
>significant power overheads? Personally I'm not sure I care 

yes, it's necessary as average C-state residency is almost halved 
if not aligning, and thus draw higher power

Thanks
Kevin

>too much about a
>minor regression on RH3, if this patch is worthwhile at all I think it
>should be always on and at most have a domain config option. I 
>think a boot
>option will never ever be used.
>
> -- Keir
>
>
>
>_______________________________________________
>Xen-devel mailing list
>Xen-devel@lists.xensource.com
>http://lists.xensource.com/xen-devel
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] Align periodic vpts
  2009-02-11 11:33 ` Keir Fraser
@ 2009-02-11 11:48   ` Tian, Kevin
  2009-02-11 12:00     ` Tian, Kevin
  0 siblings, 1 reply; 14+ messages in thread
From: Tian, Kevin @ 2009-02-11 11:48 UTC (permalink / raw)
  To: 'Keir Fraser', Wei, Gang, xen-devel

>From: Keir Fraser
>Sent: Wednesday, February 11, 2009 7:34 PM
>
>On 11/02/2009 11:05, "Wei, Gang" <gang.wei@intel.com> wrote:
>
>> Aligned periodic vpts can improve the HVM guest power 
>consumption a lot,
>> especially while the guest using high HZ such as 1000HZ.
>> This patch aligns all periodic vpts except vlapic to the 
>period bound. For
>> vlapic, only make it aligned while using the new option 
>"align_periodic_vpt".
>> 
>> Signed-off-by: Wei Gang <gang.wei@intel.com>
>
>Also, Intel already contributed code to merge up timers. It's the
>expiry-range patch in common/timer.c. This could be used by 
>vpt.c to add a
>per-domain acceptable range on vpt expiries. High-frequency 
>timers would
>then naturally fire together. Having a per-domain config 
>option for this
>would be something that would actually seem more generically 
>useful (could
>be used perhaps for other timers beyond vpt.c even).
>
>This seems to me a more intuitive and gracefully 
>selectable/de-selectable
>alternative to this proposed patch, which really looks like a hardcoded
>hack.
>

nice idea. But one quick think in my mind leads to one issue. Now
Xen timer doesn't differentiate single-shot or periodic timer. Then such
per-domain range option could also impact single-shot timer servicing
same domain... Of course current global slop option has same effect.
But it'd be better to mitigate the side effect on single-shot timer. Is it
feasible to add a new set_timer_range interface for explicit invocation,
e.g. by vpt, while still keeping original global slop option applying to all?

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] Align periodic vpts
  2009-02-11 11:48   ` Tian, Kevin
@ 2009-02-11 12:00     ` Tian, Kevin
  2009-02-11 13:05       ` Keir Fraser
  0 siblings, 1 reply; 14+ messages in thread
From: Tian, Kevin @ 2009-02-11 12:00 UTC (permalink / raw)
  To: Tian, Kevin, 'Keir Fraser', Wei, Gang, xen-devel

>From: Tian, Kevin
>Sent: Wednesday, February 11, 2009 7:48 PM
>
>>From: Keir Fraser
>>Sent: Wednesday, February 11, 2009 7:34 PM
>>
>>On 11/02/2009 11:05, "Wei, Gang" <gang.wei@intel.com> wrote:
>>
>>> Aligned periodic vpts can improve the HVM guest power 
>>consumption a lot,
>>> especially while the guest using high HZ such as 1000HZ.
>>> This patch aligns all periodic vpts except vlapic to the 
>>period bound. For
>>> vlapic, only make it aligned while using the new option 
>>"align_periodic_vpt".
>>> 
>>> Signed-off-by: Wei Gang <gang.wei@intel.com>
>>
>>Also, Intel already contributed code to merge up timers. It's the
>>expiry-range patch in common/timer.c. This could be used by 
>>vpt.c to add a
>>per-domain acceptable range on vpt expiries. High-frequency 
>>timers would
>>then naturally fire together. Having a per-domain config 
>>option for this
>>would be something that would actually seem more generically 
>>useful (could
>>be used perhaps for other timers beyond vpt.c even).
>>
>>This seems to me a more intuitive and gracefully 
>>selectable/de-selectable
>>alternative to this proposed patch, which really looks like a 
>hardcoded
>>hack.
>>
>
>nice idea. But one quick think in my mind leads to one issue. Now
>Xen timer doesn't differentiate single-shot or periodic timer. 
>Then such
>per-domain range option could also impact single-shot timer servicing
>same domain... Of course current global slop option has same effect.
>But it'd be better to mitigate the side effect on single-shot 
>timer. Is it
>feasible to add a new set_timer_range interface for explicit 
>invocation,
>e.g. by vpt, while still keeping original global slop option 
>applying to all?
>

Think it more, I think that Jimmy's patch is simpler and more 
accurate for the purpose. It's just a one-time adjustment for
periodical timer, and no harm to single-shot timer. It can be
enabled by default, while per-domain range has side-effect 
unless adding more code to differentiate timers which is not
worthy. 

Of course per-domain switch is still required to disable it as
your previous comment, for old guest.

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Align periodic vpts
  2009-02-11 12:00     ` Tian, Kevin
@ 2009-02-11 13:05       ` Keir Fraser
  2009-02-11 13:20         ` Tian, Kevin
  0 siblings, 1 reply; 14+ messages in thread
From: Keir Fraser @ 2009-02-11 13:05 UTC (permalink / raw)
  To: Tian, Kevin, Wei, Gang, xen-devel

On 11/02/2009 12:00, "Tian, Kevin" <kevin.tian@intel.com> wrote:

> Think it more, I think that Jimmy's patch is simpler and more
> accurate for the purpose. It's just a one-time adjustment for
> periodical timer, and no harm to single-shot timer. It can be
> enabled by default, while per-domain range has side-effect
> unless adding more code to differentiate timers which is not
> worthy. 
> 
> Of course per-domain switch is still required to disable it as
> your previous comment, for old guest.

I'd actually be interested in knowing how just bumping Xen cmdline option
timer_slop= would influence power usage and guest timers. No new code
needed, a nice sliding dial (per host) for power usage versus timer
accuracy.

 -- Keir

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] Align periodic vpts
  2009-02-11 13:05       ` Keir Fraser
@ 2009-02-11 13:20         ` Tian, Kevin
  2009-02-12  2:09           ` Wei, Gang
  0 siblings, 1 reply; 14+ messages in thread
From: Tian, Kevin @ 2009-02-11 13:20 UTC (permalink / raw)
  To: 'Keir Fraser', Wei, Gang, xen-devel

>From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] 
>Sent: Wednesday, February 11, 2009 9:06 PM
>
>On 11/02/2009 12:00, "Tian, Kevin" <kevin.tian@intel.com> wrote:
>
>> Think it more, I think that Jimmy's patch is simpler and more
>> accurate for the purpose. It's just a one-time adjustment for
>> periodical timer, and no harm to single-shot timer. It can be
>> enabled by default, while per-domain range has side-effect
>> unless adding more code to differentiate timers which is not
>> worthy. 
>> 
>> Of course per-domain switch is still required to disable it as
>> your previous comment, for old guest.
>
>I'd actually be interested in knowing how just bumping Xen 
>cmdline option
>timer_slop= would influence power usage and guest timers. No new code
>needed, a nice sliding dial (per host) for power usage versus timer
>accuracy.
>

We'll present some in-depth data in near summit. Basically for
a single 2-vcpu HVM RHEL5u1 on a two core mobile platform,
1ms slop, compared to default 50us, could bring 7.5% more power
saving by reducing timer interrupt by a factor of 3 (RHEL5u1 is
by default 1000HZ meaning 3000 virtual interrupts for 1 vPIT and 2
vAPIC, and then 1ms slop roughly drops interrupt to ~1000). By
running SPECpower, power efficiency score is also slightly improved.

However when we run iperf to check latency, the data became 
unstable.

So range timer does affects latency, but in general is a power
efficient feature to fit requirement where power matters more. It's
especially useful at cpu over-commitment where more chances
to align timers and reduce interrupts by a higher factor.

While range timer impacts all timers nondistinctively (xen timer
itself is in essential one shot), Jimmy's patch tends to reach
similar effect for periodical timer (since once align at 1st shot,
so does latter), while leaving single shot timer as it is w/o touching
global slop.

To me above two are not identical which reduces power in different
level.:-)

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: Re: [PATCH] Align periodic vpts
  2009-02-11 11:23 ` Keir Fraser
  2009-02-11 11:41   ` Tian, Kevin
@ 2009-02-11 14:55   ` Dan Magenheimer
  2009-02-11 15:24     ` Tian, Kevin
  1 sibling, 1 reply; 14+ messages in thread
From: Dan Magenheimer @ 2009-02-11 14:55 UTC (permalink / raw)
  To: Keir Fraser, Wei, Gang, xen-devel

> > + * CAUTION:
> > + * While vlapic timer ticking too close to the pit.  We 
> saw a userspace
> > + * application getting the wrong answer because long CPU 
> bound sequences
> > + * appeared to run with zero CPU time. This only showed up 
> with old Linux
> > + * kernels (IIRC, it was with Red Hat 3 U8). So this 
> option may cause a
> > + * regression in this case.
> > + */
> > +static int opt_align_periodic_vpt = 0;
> > +boolean_param("align_periodic_vpt", opt_align_periodic_vpt);
> > +
> 
> Presumably there are common cases where not aligning vlapic too has
> significant power overheads? Personally I'm not sure I care 
> too much about a
> minor regression on RH3, if this patch is worthwhile at all I think it
> should be always on and at most have a domain config option. 
> I think a boot
> option will never ever be used.

Given the wide variety of guests, and clocksource defaults/choices
in those guests, I'm leery about this change, especially turning it
on by default.  The consequences of guest clocks losing time or gaining
time or appearing to go backwards are significant and the potential
problems go well beyond "a minor regression on RH3" and IMHO are
much more impactful for customers than saving a watt or two.

It is difficult in a simple test environment to reproduce the
problem unless you know what you are looking for.
Virtual Iron had a rather extensive set of test cases and I'd
like to see those run before this is turned on by default.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: Re: [PATCH] Align periodic vpts
  2009-02-11 14:55   ` Dan Magenheimer
@ 2009-02-11 15:24     ` Tian, Kevin
  2009-02-11 15:35       ` Dan Magenheimer
  0 siblings, 1 reply; 14+ messages in thread
From: Tian, Kevin @ 2009-02-11 15:24 UTC (permalink / raw)
  To: 'Dan Magenheimer', Keir Fraser, Wei, Gang, xen-devel

>From: Dan Magenheimer
>Sent: Wednesday, February 11, 2009 10:56 PM
>
>> > + * CAUTION:
>> > + * While vlapic timer ticking too close to the pit.  We 
>> saw a userspace
>> > + * application getting the wrong answer because long CPU 
>> bound sequences
>> > + * appeared to run with zero CPU time. This only showed up 
>> with old Linux
>> > + * kernels (IIRC, it was with Red Hat 3 U8). So this 
>> option may cause a
>> > + * regression in this case.
>> > + */
>> > +static int opt_align_periodic_vpt = 0;
>> > +boolean_param("align_periodic_vpt", opt_align_periodic_vpt);
>> > +
>> 
>> Presumably there are common cases where not aligning vlapic too has
>> significant power overheads? Personally I'm not sure I care 
>> too much about a
>> minor regression on RH3, if this patch is worthwhile at all 
>I think it
>> should be always on and at most have a domain config option. 
>> I think a boot
>> option will never ever be used.
>
>Given the wide variety of guests, and clocksource defaults/choices
>in those guests, I'm leery about this change, especially turning it
>on by default.  The consequences of guest clocks losing time or gaining
>time or appearing to go backwards are significant and the potential
>problems go well beyond "a minor regression on RH3" and IMHO are
>much more impactful for customers than saving a watt or two.

I'm not sure why you count this feature as the cause for guest time
inaccuracy. This patch only shifts 1st expiration of periodical timer,
and later expirations are all exactly as expected relative to previous
one.

>
>It is difficult in a simple test environment to reproduce the
>problem unless you know what you are looking for.
>Virtual Iron had a rather extensive set of test cases and I'd
>like to see those run before this is turned on by default.
>

VI's issue is not about time inaccuracy or performance, which is
about accounting.

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: Re: [PATCH] Align periodic vpts
  2009-02-11 15:24     ` Tian, Kevin
@ 2009-02-11 15:35       ` Dan Magenheimer
  2009-02-11 15:40         ` Tian, Kevin
  0 siblings, 1 reply; 14+ messages in thread
From: Dan Magenheimer @ 2009-02-11 15:35 UTC (permalink / raw)
  To: Tian, Kevin, Keir Fraser, Wei, Gang, xen-devel

> I'm not sure why you count this feature as the cause for guest time
> inaccuracy. This patch only shifts 1st expiration of periodical timer,
> and later expirations are all exactly as expected relative to previous
> one.

OK, if that is correct, I have no problem with the patch.

> VI's issue is not about time inaccuracy or performance, which is
> about accounting.

Many of the problems (and Oracle has seen customers with similar
problems) are related to the timing of delivery of "ticks"
relative to each other, e.g. if consecutive ticks are sometimes
0.01s apart and sometimes 0.0099s apart and sometimes 0.0101s
apart, this causes different problems for different guests with
different default/chosen clocksources.

Dan

> -----Original Message-----
> From: Tian, Kevin [mailto:kevin.tian@intel.com]
> Sent: Wednesday, February 11, 2009 8:25 AM
> To: Dan Magenheimer; Keir Fraser; Wei, Gang; xen-devel
> Subject: RE: [Xen-devel] Re: [PATCH] Align periodic vpts
> 
> 
> >From: Dan Magenheimer
> >Sent: Wednesday, February 11, 2009 10:56 PM
> >
> >> > + * CAUTION:
> >> > + * While vlapic timer ticking too close to the pit.  We 
> >> saw a userspace
> >> > + * application getting the wrong answer because long CPU 
> >> bound sequences
> >> > + * appeared to run with zero CPU time. This only showed up 
> >> with old Linux
> >> > + * kernels (IIRC, it was with Red Hat 3 U8). So this 
> >> option may cause a
> >> > + * regression in this case.
> >> > + */
> >> > +static int opt_align_periodic_vpt = 0;
> >> > +boolean_param("align_periodic_vpt", opt_align_periodic_vpt);
> >> > +
> >> 
> >> Presumably there are common cases where not aligning vlapic too has
> >> significant power overheads? Personally I'm not sure I care 
> >> too much about a
> >> minor regression on RH3, if this patch is worthwhile at all 
> >I think it
> >> should be always on and at most have a domain config option. 
> >> I think a boot
> >> option will never ever be used.
> >
> >Given the wide variety of guests, and clocksource defaults/choices
> >in those guests, I'm leery about this change, especially turning it
> >on by default.  The consequences of guest clocks losing time 
> or gaining
> >time or appearing to go backwards are significant and the potential
> >problems go well beyond "a minor regression on RH3" and IMHO are
> >much more impactful for customers than saving a watt or two.
> 
> I'm not sure why you count this feature as the cause for guest time
> inaccuracy. This patch only shifts 1st expiration of periodical timer,
> and later expirations are all exactly as expected relative to previous
> one.
> 
> >
> >It is difficult in a simple test environment to reproduce the
> >problem unless you know what you are looking for.
> >Virtual Iron had a rather extensive set of test cases and I'd
> >like to see those run before this is turned on by default.
> >
> 
> VI's issue is not about time inaccuracy or performance, which is
> about accounting.
> 
> Thanks,
> Kevin

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: Re: [PATCH] Align periodic vpts
  2009-02-11 15:35       ` Dan Magenheimer
@ 2009-02-11 15:40         ` Tian, Kevin
  0 siblings, 0 replies; 14+ messages in thread
From: Tian, Kevin @ 2009-02-11 15:40 UTC (permalink / raw)
  To: 'Dan Magenheimer', Keir Fraser, Wei, Gang, xen-devel

>From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com] 
>Sent: Wednesday, February 11, 2009 11:36 PM
>
>> I'm not sure why you count this feature as the cause for guest time
>> inaccuracy. This patch only shifts 1st expiration of 
>periodical timer,
>> and later expirations are all exactly as expected relative 
>to previous
>> one.
>
>OK, if that is correct, I have no problem with the patch.
>
>> VI's issue is not about time inaccuracy or performance, which is
>> about accounting.
>
>Many of the problems (and Oracle has seen customers with similar
>problems) are related to the timing of delivery of "ticks"
>relative to each other, e.g. if consecutive ticks are sometimes
>0.01s apart and sometimes 0.0099s apart and sometimes 0.0101s
>apart, this causes different problems for different guests with
>different default/chosen clocksources.
>

Yep, I agree. It's a common issue from virtualization. :-)

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] Align periodic vpts
  2009-02-11 13:20         ` Tian, Kevin
@ 2009-02-12  2:09           ` Wei, Gang
  2009-02-13  9:21             ` Wei, Gang
  0 siblings, 1 reply; 14+ messages in thread
From: Wei, Gang @ 2009-02-12  2:09 UTC (permalink / raw)
  To: Tian, Kevin, 'Keir Fraser', xen-devel

On Wednesday, February 11, 2009 9:21 PM, Tian, Kevin wrote:
>> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
>> Sent: Wednesday, February 11, 2009 9:06 PM
>> 
>> I'd actually be interested in knowing how just bumping Xen
>> cmdline option
>> timer_slop= would influence power usage and guest timers. No new code
>> needed, a nice sliding dial (per host) for power usage versus timer accuracy.
...
> So range timer does affects latency, but in general is a power
> efficient feature to fit requirement where power matters more. It's
> especially useful at cpu over-commitment where more chances
> to align timers and reduce interrupts by a higher factor.
> 
> While range timer impacts all timers nondistinctively (xen timer
> itself is in essential one shot), Jimmy's patch tends to reach
> similar effect for periodical timer (since once align at 1st shot,
> so does latter), while leaving single shot timer as it is w/o touching
> global slop.
> 
> To me above two are not identical which reduces power in different
> level.:-)

Just as Kevin explained, aligning periodic timer in the beginning could bring powe gain with less impact to timer expiry accuracy, so it is suitable for using before we dig out most of the side effect for large slop range timer. I will try to make the option per-domain and then resend the patch. Meanwhile, I am also prefer to make this option default on. Any further comments?

Jimmy

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] Align periodic vpts
  2009-02-12  2:09           ` Wei, Gang
@ 2009-02-13  9:21             ` Wei, Gang
  0 siblings, 0 replies; 14+ messages in thread
From: Wei, Gang @ 2009-02-13  9:21 UTC (permalink / raw)
  To: Tian, Kevin, 'Keir Fraser', xen-devel

[-- Attachment #1: Type: text/plain, Size: 612 bytes --]

On Thursday, February 12, 2009 10:10 AM, Wei, Gang wrote:
> Just as Kevin explained, aligning periodic timer in the beginning could bring
> powe gain with less impact to timer expiry accuracy, so it is suitable for
> using before we dig out most of the side effect for large slop range timer. I
> will try to make the option per-domain and then resend the patch. Meanwhile,
> I am also prefer to make this option default on. Any further comments?    

Here is the updated patch which makes a per-domain option "vpt_align". The C3 average residency was doubled with vpt_align=1for RHEL5 hvm guest.

Jimmy

[-- Attachment #2: vpt-align-0213.patch --]
[-- Type: application/octet-stream, Size: 6792 bytes --]

Align periodic vpts

Aligned periodic vpts can improve the HVM guest power consumption a lot, especially while the guest using high HZ such as 1000HZ.
This patch aligns all periodic vpts to the period bound by default. It is still possible to cannel the forced alignment via giving the new per-domain config option "vpt_align=0".

Signed-off-by: Wei Gang <gang.wei@intel.com>

diff -r b999142bca8c tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py	Fri Jan 09 16:56:54 2009 +0000
+++ b/tools/python/xen/xend/XendConfig.py	Fri Feb 13 14:40:36 2009 +0800
@@ -157,6 +157,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
     'vncdisplay': int,
     'vnclisten': str,
     'timer_mode': int,
+    'vpt_align': int,
     'viridian': int,
     'vncpasswd': str,
     'vncunused': int,
@@ -458,6 +459,8 @@ class XendConfig(dict):
                 self['platform']['rtc_timeoffset'] = 0
             if 'hpet' not in self['platform']:
                 self['platform']['hpet'] = 0
+            if 'vpt_align' not in self['platform']:
+                self['platform']['vpt_align'] = 1
             if 'loader' not in self['platform']:
                 # Old configs may have hvmloader set as PV_kernel param
                 if self.has_key('PV_kernel') and self['PV_kernel'] != '':
diff -r b999142bca8c tools/python/xen/xend/XendConstants.py
--- a/tools/python/xen/xend/XendConstants.py	Fri Jan 09 16:56:54 2009 +0000
+++ b/tools/python/xen/xend/XendConstants.py	Fri Feb 13 14:40:36 2009 +0800
@@ -50,6 +50,7 @@ HVM_PARAM_TIMER_MODE   = 10
 HVM_PARAM_TIMER_MODE   = 10
 HVM_PARAM_HPET_ENABLED = 11
 HVM_PARAM_ACPI_S_STATE = 14
+HVM_PARAM_VPT_ALIGN    = 16
 
 restart_modes = [
     "restart",
diff -r b999142bca8c tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py	Fri Jan 09 16:56:54 2009 +0000
+++ b/tools/python/xen/xend/XendDomainInfo.py	Fri Feb 13 14:40:36 2009 +0800
@@ -2236,6 +2236,12 @@ class XendDomainInfo:
         if hvm and hpet is not None:
             xc.hvm_set_param(self.domid, HVM_PARAM_HPET_ENABLED,
                              long(hpet))
+
+        # Optionally enable periodic vpt aligning
+        vpt_align = self.info["platform"].get("vpt_align")
+        if hvm and vpt_align is not None:
+            xc.hvm_set_param(self.domid, HVM_PARAM_VPT_ALIGN,
+                             long(vpt_align))
 
         # Set maximum number of vcpus in domain
         xc.domain_max_vcpus(self.domid, int(self.info['VCPUs_max']))
diff -r b999142bca8c tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py	Fri Jan 09 16:56:54 2009 +0000
+++ b/tools/python/xen/xm/create.py	Fri Feb 13 14:40:36 2009 +0800
@@ -218,6 +218,10 @@ gopts.var('timer_mode', val='TIMER_MODE'
           fn=set_int, default=1,
           use="""Timer mode (0=delay virtual time when ticks are missed;
           1=virtual time is always wallclock time.""")
+
+gopts.var('vpt_align', val='VPT_ALIGN',
+          fn=set_int, default=1,
+          use="Enable aligning all periodic vpt to reduce timer interrupts.")
 
 gopts.var('viridian', val='VIRIDIAN',
           fn=set_int, default=0,
@@ -889,7 +893,8 @@ def configure_hvm(config_image, vals):
              'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',
              'acpi', 'apic', 'usb', 'usbdevice', 'keymap', 'pci', 'hpet',
              'guest_os_type', 'hap', 'opengl', 'cpuid', 'cpuid_check',
-             'viridian', 'xen_extended_power_mgmt', 'pci_msitranslate' ]
+             'viridian', 'xen_extended_power_mgmt', 'pci_msitranslate',
+             'vpt_align' ]
 
     for a in args:
         if a in vals.__dict__ and vals.__dict__[a] is not None:
diff -r b999142bca8c tools/python/xen/xm/xenapi_create.py
--- a/tools/python/xen/xm/xenapi_create.py	Fri Jan 09 16:56:54 2009 +0000
+++ b/tools/python/xen/xm/xenapi_create.py	Fri Feb 13 14:40:36 2009 +0800
@@ -1037,6 +1037,7 @@ class sxp2xml:
             'usbdevice',
             'hpet',
             'timer_mode',
+            'vpt_align',
             'viridian',
             'vhpt',
             'guest_os_type',
diff -r b999142bca8c xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c	Fri Jan 09 16:56:54 2009 +0000
+++ b/xen/arch/x86/hvm/hvm.c	Fri Feb 13 14:40:36 2009 +0800
@@ -306,6 +306,7 @@ int hvm_domain_initialise(struct domain 
     hvm_init_guest_time(d);
 
     d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1;
+    d->arch.hvm_domain.params[HVM_PARAM_VPT_ALIGN]    = 1;
 
     hvm_init_cacheattr_region_list(d);
 
diff -r b999142bca8c xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c	Fri Jan 09 16:56:54 2009 +0000
+++ b/xen/arch/x86/hvm/vpt.c	Fri Feb 13 14:40:36 2009 +0800
@@ -389,8 +389,14 @@ void create_periodic_time(
      * LAPIC ticks for process accounting can see long sequences of process
      * ticks incorrectly accounted to interrupt processing.
      */
-    if ( !pt->one_shot && (pt->source == PTSRC_lapic) )
-        pt->scheduled += delta >> 1;
+    if ( !pt->one_shot )
+    {
+        if ( v->domain->arch.hvm_domain.params[HVM_PARAM_VPT_ALIGN] )
+            pt->scheduled = align_timer(pt->scheduled, pt->period);
+        else if ( pt->source == PTSRC_lapic )
+            pt->scheduled += delta >> 1;
+    }
+
     pt->cb = cb;
     pt->priv = data;
 
diff -r b999142bca8c xen/common/timer.c
--- a/xen/common/timer.c	Fri Jan 09 16:56:54 2009 +0000
+++ b/xen/common/timer.c	Fri Feb 13 14:40:36 2009 +0800
@@ -473,6 +473,13 @@ void process_pending_timers(void)
         timer_softirq_action();
 }
 
+s_time_t align_timer(s_time_t firsttick, uint64_t period)
+{
+    if ( !period )
+        return firsttick;
+
+    return firsttick + (period - 1) - ((firsttick - 1) % period);
+}
 
 static void dump_timerq(unsigned char key)
 {
diff -r b999142bca8c xen/include/public/hvm/params.h
--- a/xen/include/public/hvm/params.h	Fri Jan 09 16:56:54 2009 +0000
+++ b/xen/include/public/hvm/params.h	Fri Feb 13 14:40:36 2009 +0800
@@ -103,6 +103,9 @@
 /* TSS used on Intel when CR0.PE=0. */
 #define HVM_PARAM_VM86_TSS     15
 
-#define HVM_NR_PARAMS          16
+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */
+#define HVM_PARAM_VPT_ALIGN    16
+
+#define HVM_NR_PARAMS          17
 
 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
diff -r b999142bca8c xen/include/xen/timer.h
--- a/xen/include/xen/timer.h	Fri Jan 09 16:56:54 2009 +0000
+++ b/xen/include/xen/timer.h	Fri Feb 13 14:40:36 2009 +0800
@@ -122,6 +122,9 @@ DECLARE_PER_CPU(s_time_t, timer_deadline
 /* Arch-defined function to reprogram timer hardware for new deadline. */
 extern int reprogram_timer(s_time_t timeout);
 
+/* calculate the aligned first tick time for a given periodic timer */ 
+extern s_time_t align_timer(s_time_t firsttick, uint64_t period);
+
 #endif /* _TIMER_H_ */
 
 /*

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2009-02-13  9:21 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-02-11 11:05 [PATCH] Align periodic vpts Wei, Gang
2009-02-11 11:23 ` Keir Fraser
2009-02-11 11:41   ` Tian, Kevin
2009-02-11 14:55   ` Dan Magenheimer
2009-02-11 15:24     ` Tian, Kevin
2009-02-11 15:35       ` Dan Magenheimer
2009-02-11 15:40         ` Tian, Kevin
2009-02-11 11:33 ` Keir Fraser
2009-02-11 11:48   ` Tian, Kevin
2009-02-11 12:00     ` Tian, Kevin
2009-02-11 13:05       ` Keir Fraser
2009-02-11 13:20         ` Tian, Kevin
2009-02-12  2:09           ` Wei, Gang
2009-02-13  9:21             ` Wei, Gang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.