linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jeremy Fitzhardinge <jeremy@goop.org>
To: Dan Hecht <dhecht@vmware.com>
Cc: dwalker@mvista.com, cpufreq@lists.linux.org.uk,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Con Kolivas <kernel@kolivas.org>,
	Chris Wright <chrisw@sous-sol.org>,
	Virtualization Mailing List <virtualization@lists.osdl.org>,
	john stultz <johnstul@us.ibm.com>, Ingo Molnar <mingo@elte.hu>,
	Thomas Gleixner <tglx@linutronix.de>,
	paulus@au.ibm.com, schwidefsky@de.ibm.com,
	Rik van Riel <riel@redhat.com>
Subject: Re: Stolen and degraded time and schedulers
Date: Wed, 14 Mar 2007 13:31:55 -0700	[thread overview]
Message-ID: <45F85BBB.70707@goop.org> (raw)
In-Reply-To: <45F85A62.8050001@vmware.com>

[-- Attachment #1: Type: text/plain, Size: 704 bytes --]

Dan Hecht wrote:
> Sounds good.  I don't see this in your patchset you sent yesterday
> though; did you add it after sending out those patches?

Yes.

>   if so, could you forward the new patch?  does it explicitly prevent
> stolen time from getting accounted as  user/system time or does it
> just rely on NO_HZ mode sort of happening to work that way (since the
> one shot timer is skipped ahead for missed ticks)?

Hm, not sure.  It doesn't care how often it gets called; it just
accumulates results up to that point, but I'm not sure if the time would
get double accounted.  Perhaps it doesn't matter when using
xen_sched_clock().

Did the get_scheduled_time -> sched_clock make sense to you?

    J

[-- Attachment #2: xen-stolen-time.patch --]
[-- Type: text/x-patch, Size: 3983 bytes --]

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: john stultz <johnstul@us.ibm.com>

---
 arch/i386/xen/time.c |  101 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)

===================================================================
--- a/arch/i386/xen/time.c
+++ b/arch/i386/xen/time.c
@@ -2,6 +2,7 @@
 #include <linux/interrupt.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/kernel_stat.h>
 
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
@@ -14,6 +15,7 @@
 
 #define XEN_SHIFT 22
 #define TIMER_SLOP	100000	/* Xen may fire a timer up to this many ns early */
+#define NS_PER_TICK	(1000000000ll / HZ)
 
 static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
 
@@ -28,6 +30,99 @@ struct shadow_time_info {
 
 static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
 
+/* runstate info updated by Xen */
+static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
+
+/* snapshots of runstate info */
+static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot);
+
+/* unused ns of stolen and blocked time */
+static DEFINE_PER_CPU(u64, residual_stolen);
+static DEFINE_PER_CPU(u64, residual_blocked);
+
+/*
+   Runstate accounting
+ */
+static void get_runstate_snapshot(struct vcpu_runstate_info *res)
+{
+	u64 state_time;
+	struct vcpu_runstate_info *state;
+
+	preempt_disable();
+
+	state = &__get_cpu_var(runstate);
+
+	do {
+		state_time = state->state_entry_time;
+		barrier();
+		*res = *state;
+		barrier();
+	} while(state->state_entry_time != state_time);
+
+	preempt_enable();
+}
+
+static void setup_runstate_info(void)
+{
+	struct vcpu_register_runstate_memory_area area;
+
+	area.addr.v = &__get_cpu_var(runstate);
+
+	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
+			       smp_processor_id(), &area))
+		BUG();
+
+	get_runstate_snapshot(&__get_cpu_var(runstate_snapshot));
+}
+
+static void do_stolen_accounting(void)
+{
+	struct vcpu_runstate_info state;
+	struct vcpu_runstate_info *snap;
+	u64 blocked, runnable, offline, stolen;
+	cputime_t ticks;
+
+	get_runstate_snapshot(&state);
+
+	WARN_ON(state.state != RUNSTATE_running);
+
+	snap = &__get_cpu_var(runstate_snapshot);
+
+	/* work out how much time the VCPU has not been runn*ing*  */
+	blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
+	runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
+	offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
+
+	*snap = state;
+
+	/* Add the appropriate number of ticks of stolen time,
+	   including any left-overs from last time.  Passing NULL to
+	   account_steal_time accounts the time as stolen. */
+	stolen = runnable + offline + __get_cpu_var(residual_stolen);
+	ticks = 0;
+	while(stolen >= NS_PER_TICK) {
+		ticks++;
+		stolen -= NS_PER_TICK;
+	}
+	__get_cpu_var(residual_stolen) = stolen;
+	account_steal_time(NULL, ticks);
+
+	/* Add the appropriate number of ticks of blocked time,
+	   including any left-overs from last time.  Passing idle to
+	   account_steal_time accounts the time as idle/wait. */
+	blocked += __get_cpu_var(residual_blocked);
+	ticks = 0;
+	while(blocked >= NS_PER_TICK) {
+		ticks++;
+		blocked -= NS_PER_TICK;
+	}
+	__get_cpu_var(residual_blocked) = blocked;
+	account_steal_time(idle_task(smp_processor_id()), ticks);
+}
+
+
+
+/* Get the CPU speed from Xen */
 unsigned long xen_cpu_khz(void)
 {
 	u64 cpu_khz = 1000000ULL << 32;
@@ -264,6 +359,8 @@ static irqreturn_t xen_timerop_timer_int
 		ret = IRQ_HANDLED;
 	}
 
+	do_stolen_accounting();
+
 	return ret;
 }
 
@@ -338,6 +435,8 @@ static irqreturn_t xen_vcpuop_timer_inte
 		ret = IRQ_HANDLED;
 	}
 
+	do_stolen_accounting();
+
 	return ret;
 }
 
@@ -380,6 +479,8 @@ static void xen_setup_timer(int cpu)
 	evt->cpumask = cpumask_of_cpu(cpu);
 	evt->irq = irq;
 	clockevents_register_device(evt);
+
+	setup_runstate_info();
 
 	put_cpu_var(xen_clock_events);
 }

[-- Attachment #3: xen-sched-clock.patch --]
[-- Type: text/x-patch, Size: 2222 bytes --]

Subject: Implement xen_sched_clock

Implement xen_sched_clock, which returns the number of ns the current
vcpu has been actually in the running state (vs blocked,
runnable-but-not-running, or offline) since boot.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: john stultz <johnstul@us.ibm.com>

---
 arch/i386/xen/enlighten.c |    2 +-
 arch/i386/xen/time.c      |   14 ++++++++++++++
 arch/i386/xen/xen-ops.h   |    1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

===================================================================
--- a/arch/i386/xen/enlighten.c
+++ b/arch/i386/xen/enlighten.c
@@ -664,7 +664,7 @@ static const struct paravirt_ops xen_par
 	.set_wallclock = xen_set_wallclock,
 	.get_wallclock = xen_get_wallclock,
 	.get_cpu_khz = xen_cpu_khz,
-	.get_scheduled_cycles = native_read_tsc,
+	.sched_clock = xen_sched_clock,
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	.apic_write = paravirt_nop,
===================================================================
--- a/arch/i386/xen/time.c
+++ b/arch/i386/xen/time.c
@@ -16,6 +16,8 @@
 #define XEN_SHIFT 22
 #define TIMER_SLOP	100000	/* Xen may fire a timer up to this many ns early */
 #define NS_PER_TICK	(1000000000ll / HZ)
+
+static cycle_t xen_clocksource_read(void);
 
 static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
 
@@ -120,6 +122,18 @@ static void do_stolen_accounting(void)
 	account_steal_time(idle_task(smp_processor_id()), ticks);
 }
 
+/* Xen sched_clock implementation.  Returns the number of RUNNING ns */
+unsigned long long xen_sched_clock(void)
+{
+	struct vcpu_runstate_info state;
+	cycle_t now = xen_clocksource_read();
+
+	get_runstate_snapshot(&state);
+
+	WARN_ON(state.state != RUNSTATE_running);
+
+	return state.time[RUNSTATE_running] + (now - state.state_entry_time);
+}
 
 
 /* Get the CPU speed from Xen */
===================================================================
--- a/arch/i386/xen/xen-ops.h
+++ b/arch/i386/xen/xen-ops.h
@@ -14,6 +14,7 @@ void __init xen_time_init(void);
 void __init xen_time_init(void);
 unsigned long xen_get_wallclock(void);
 int xen_set_wallclock(unsigned long time);
+unsigned long long xen_sched_clock(void);
 
 void xen_mark_init_mm_pinned(void);
 

  reply	other threads:[~2007-03-14 20:32 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-03-13 16:31 Stolen and degraded time and schedulers Jeremy Fitzhardinge
2007-03-13 20:12 ` john stultz
2007-03-13 20:32   ` Jeremy Fitzhardinge
2007-03-13 21:27     ` Daniel Walker
2007-03-13 21:59       ` Jeremy Fitzhardinge
2007-03-14  0:43         ` Dan Hecht
2007-03-14  4:37           ` Jeremy Fitzhardinge
2007-03-14 13:58             ` Lennart Sorensen
2007-03-14 15:08               ` Jeremy Fitzhardinge
2007-03-14 15:12                 ` Lennart Sorensen
2007-03-14 19:02             ` Dan Hecht
2007-03-14 19:34               ` Jeremy Fitzhardinge
2007-03-14 19:45                 ` Rik van Riel
2007-03-14 19:47                   ` Jeremy Fitzhardinge
2007-03-14 20:02                     ` Rik van Riel
2007-03-14 20:26                 ` Dan Hecht
2007-03-14 20:31                   ` Jeremy Fitzhardinge [this message]
2007-03-14 20:46                     ` Dan Hecht
2007-03-14 21:18                       ` Jeremy Fitzhardinge
2007-03-15 19:09                         ` Dan Hecht
2007-03-15 19:18                           ` Jeremy Fitzhardinge
2007-03-15 19:48                           ` Rik van Riel
2007-03-15 19:53                           ` Jeremy Fitzhardinge
2007-03-15 20:07                             ` Dan Hecht
2007-03-15 20:14                               ` Rik van Riel
2007-03-15 20:35                                 ` Dan Hecht
2007-03-16  8:59                                   ` Martin Schwidefsky
2007-03-14 20:38                 ` Ingo Molnar
2007-03-14 20:59                   ` Jeremy Fitzhardinge
2007-03-16  8:38                     ` Ingo Molnar
2007-03-16 16:53                       ` Jeremy Fitzhardinge
2007-03-15  5:23                 ` Paul Mackerras
2007-03-15 19:33                   ` Jeremy Fitzhardinge
2007-03-14  2:00         ` Daniel Walker
2007-03-14  6:52           ` Jeremy Fitzhardinge
2007-03-14  8:20             ` Zan Lynx
2007-03-14 16:11             ` Daniel Walker
2007-03-14 16:37               ` Jeremy Fitzhardinge
2007-03-14 16:59                 ` Daniel Walker
2007-03-14 17:08                   ` Jeremy Fitzhardinge
2007-03-14 18:06                     ` Daniel Walker
2007-03-14 18:41                       ` Jeremy Fitzhardinge
2007-03-14 19:00                         ` Daniel Walker
2007-03-14 19:44                           ` Jeremy Fitzhardinge
2007-03-14 20:33                             ` Daniel Walker
2007-03-14 21:16                               ` Jeremy Fitzhardinge
2007-03-14 21:34                                 ` Daniel Walker
2007-03-14 21:42                                   ` Jeremy Fitzhardinge
2007-03-14 21:36 ` Con Kolivas
2007-03-14 21:38   ` Jeremy Fitzhardinge
2007-03-14 21:40   ` Con Kolivas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=45F85BBB.70707@goop.org \
    --to=jeremy@goop.org \
    --cc=chrisw@sous-sol.org \
    --cc=cpufreq@lists.linux.org.uk \
    --cc=dhecht@vmware.com \
    --cc=dwalker@mvista.com \
    --cc=johnstul@us.ibm.com \
    --cc=kernel@kolivas.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=paulus@au.ibm.com \
    --cc=riel@redhat.com \
    --cc=schwidefsky@de.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=virtualization@lists.osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).