From: Michael Wolf <mjw@linux.vnet.ibm.com>
To: linux-kernel@vger.kernel.org
Cc: riel@redhat.com, gleb@redhat.com, kvm@vger.kernel.org,
peterz@infradead.org, mtosatti@redhat.com, glommer@parallels.com,
mingo@redhat.com, anthony@codemonkey.ws
Subject: [PATCH 4/4] Add a timer to allow the separation of consigned from steal time.
Date: Tue, 05 Feb 2013 15:49:41 -0600 [thread overview]
Message-ID: <20130205214941.4615.29852.stgit@lambeau> (raw)
In-Reply-To: <20130205214818.4615.12937.stgit@lambeau>
Add a helper routine to scheduler/core.c to allow the kvm module
to retrieve the cpu hardlimit settings. The values will be used
to set up a timer that is used to separate the consigned from the
steal time.
Signed-off-by: Michael Wolf <mjw@linux.vnet.ibm.com>
---
arch/x86/include/asm/kvm_host.h | 9 ++++++
arch/x86/kvm/x86.c | 62 ++++++++++++++++++++++++++++++++++++++-
kernel/sched/core.c | 20 +++++++++++++
3 files changed, 90 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fe5a37b..9518613 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -355,6 +355,15 @@ struct kvm_vcpu_arch {
bool tpr_access_reporting;
/*
+ * timer used to determine if the time should be counted as
+ * steal time or consigned time.
+ */
+ struct hrtimer steal_timer;
+ u64 current_consigned;
+ s64 consigned_quota;
+ s64 consigned_period;
+
+ /*
* Paging state of the vcpu
*
* If the vcpu runs in guest mode with two level paging this still saves
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 51b63d1..79d144d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1848,13 +1848,32 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
static void accumulate_steal_time(struct kvm_vcpu *vcpu)
{
u64 delta;
+ u64 steal_delta;
+ u64 consigned_delta;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
vcpu->arch.st.last_steal = current->sched_info.run_delay;
- vcpu->arch.st.accum_steal = delta;
+
+ /* split the delta into steal and consigned */
+ if (vcpu->arch.current_consigned < vcpu->arch.consigned_quota) {
+ vcpu->arch.current_consigned += delta;
+ if (vcpu->arch.current_consigned > vcpu->arch.consigned_quota) {
+ steal_delta = vcpu->arch.current_consigned
+ - vcpu->arch.consigned_quota;
+ consigned_delta = delta - steal_delta;
+ } else {
+ consigned_delta = delta;
+ steal_delta = 0;
+ }
+ } else {
+ consigned_delta = 0;
+ steal_delta = delta;
+ }
+ vcpu->arch.st.accum_steal = steal_delta;
+ vcpu->arch.st.accum_consigned = consigned_delta;
}
static void record_steal_time(struct kvm_vcpu *vcpu)
@@ -2629,8 +2648,35 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
}
+extern int sched_use_hard_capping(int cpuid, int num_vcpus, s64 *quota,
+ s64 *period);
+enum hrtimer_restart steal_timer_fn(struct hrtimer *data)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm *kvm;
+ int num_vcpus;
+ ktime_t now;
+
+ vcpu = container_of(data, struct kvm_vcpu, arch.steal_timer);
+ kvm = vcpu->kvm;
+ num_vcpus = atomic_read(&kvm->online_vcpus);
+ sched_use_hard_capping(vcpu->cpu, num_vcpus,
+ &vcpu->arch.consigned_quota,
+ &vcpu->arch.consigned_period);
+ vcpu->arch.current_consigned = 0;
+ now = ktime_get();
+ hrtimer_forward(&vcpu->arch.steal_timer, now,
+ ktime_set(0, vcpu->arch.consigned_period));
+
+ return HRTIMER_RESTART;
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ struct kvm *kvm;
+ int num_vcpus;
+ ktime_t ktime;
+
/* Address WBINVD may be executed by guest */
if (need_emulate_wbinvd(vcpu)) {
if (kvm_x86_ops->has_wbinvd_exit())
@@ -2670,6 +2716,18 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_migrate_timers(vcpu);
vcpu->cpu = cpu;
}
+ /* Initialize and start a timer to capture steal and consigned time */
+ kvm = vcpu->kvm;
+ num_vcpus = atomic_read(&kvm->online_vcpus);
+ num_vcpus = (num_vcpus == 0) ? 1 : num_vcpus;
+ sched_use_hard_capping(vcpu->cpu, num_vcpus,
+ &vcpu->arch.consigned_quota,
+ &vcpu->arch.consigned_period);
+ hrtimer_init(&vcpu->arch.steal_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ vcpu->arch.steal_timer.function = &steal_timer_fn;
+ ktime = ktime_set(0, vcpu->arch.consigned_period);
+ hrtimer_start(&vcpu->arch.steal_timer, ktime, HRTIMER_MODE_REL);
accumulate_steal_time(vcpu);
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
@@ -2680,6 +2738,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = native_read_tsc();
+ hrtimer_cancel(&vcpu->arch.steal_timer);
}
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
@@ -6685,6 +6744,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
int idx;
+ hrtimer_cancel(&vcpu->arch.steal_timer);
kvm_pmu_destroy(vcpu);
kfree(vcpu->arch.mce_banks);
kvm_free_lapic(vcpu);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index efc2652..133ee47 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8154,6 +8154,26 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
rcu_read_unlock();
}
+/*
+ * return 1 if the scheduler is using some form of hard capping
+ * return 0 if there is no capping configured.
+ */
+int sched_use_hard_capping(int cpuid, int num_cpus, long *quota, long *period)
+{
+ struct rq *rq = cpu_rq(cpuid);
+ struct task_struct *curr = rq->curr;
+ struct task_group *tg = curr->sched_task_group;
+ long total_time;
+
+ *period = tg_get_cfs_period(tg);
+ if (*quota == RUNTIME_INF || *quota == -1)
+ return 0;
+ *quota = jiffies_to_usecs(tg_get_cfs_quota(tg)) / num_cpus;
+ total_time = jiffies_to_usecs(*period);
+ *quota = total_time - *quota;
+ return 1;
+}
+EXPORT_SYMBOL_GPL(sched_use_hard_capping);
struct cgroup_subsys cpuacct_subsys = {
.name = "cpuacct",
next prev parent reply other threads:[~2013-02-05 21:50 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-02-05 21:49 [PATCH 0/4] Alter steal-time reporting in the guest Michael Wolf
2013-02-05 21:49 ` [PATCH 1/4] Alter the amount of steal time reported by " Michael Wolf
2013-02-05 21:49 ` [PATCH 2/4] Expand the steal time msr to also contain the consigned time Michael Wolf
2013-02-06 21:14 ` Rik van Riel
2013-02-07 14:25 ` Michael Wolf
2013-02-05 21:49 ` [PATCH 3/4] Add the code to send the consigned time from the host to the guest Michael Wolf
2013-02-06 21:18 ` Rik van Riel
2013-02-07 14:26 ` Michael Wolf
2013-02-05 21:49 ` Michael Wolf [this message]
2013-02-06 14:36 ` [PATCH 4/4] Add a timer to allow the separation of consigned from steal time Glauber Costa
2013-02-06 18:07 ` Michael Wolf
2013-02-07 8:46 ` Glauber Costa
2013-02-07 14:27 ` Michael Wolf
2013-02-18 23:57 ` Marcelo Tosatti
2013-03-05 20:17 ` Michael Wolf
2013-03-06 1:35 ` Marcelo Tosatti
2013-02-18 16:43 ` [PATCH 0/4] Alter steal-time reporting in the guest Frederic Weisbecker
2013-02-19 1:11 ` Marcelo Tosatti
2013-03-05 20:22 ` Michael Wolf
2013-03-06 1:41 ` Marcelo Tosatti
2013-03-06 8:13 ` Glauber Costa
2013-03-06 16:29 ` Michael Wolf
2013-03-07 0:52 ` Marcelo Tosatti
2013-03-07 3:11 ` Paul Mackerras
2013-03-07 20:23 ` Michael Wolf
2013-03-06 16:27 ` Michael Wolf
2013-03-07 2:30 ` Marcelo Tosatti
2013-03-07 21:09 ` Michael Wolf
2013-03-07 21:15 ` Michael Wolf
2013-03-07 21:25 ` Marcelo Tosatti
2013-03-07 22:34 ` Michael Wolf
2013-03-08 1:54 ` Marcelo Tosatti
2013-03-08 2:21 ` Marcelo Tosatti
2013-03-06 13:34 ` Frederic Weisbecker
2013-03-06 16:23 ` Michael Wolf
2013-03-06 13:20 ` Frederic Weisbecker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130205214941.4615.29852.stgit@lambeau \
--to=mjw@linux.vnet.ibm.com \
--cc=anthony@codemonkey.ws \
--cc=gleb@redhat.com \
--cc=glommer@parallels.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=mtosatti@redhat.com \
--cc=peterz@infradead.org \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).