From: <Anshul Makkar anshul.makkar@citrix.com>
To: xen-devel@lists.xen.org
Cc: george.dunlap@eu.citrix.com, dario.faggioli@citrix.com,
Anshul Makkar <anshul.makkar@citrix.com>
Subject: [PATCH] credi2-ratelimit: Implement rate limit for credit2 scheduler
Date: Wed, 6 Jul 2016 18:33:34 +0100 [thread overview]
Message-ID: <1467826414-17337-1-git-send-email-anshul.makkar@citrix.com> (raw)
From: Anshul Makkar <anshul.makkar@citrix.com>
Rate limit assures that a vcpu will execute for a minimum amount of time before
being put at the back of a queue or being preempted by higher priority thread.
It introduces a minimum amount of latency to enable a VM to batch its work and
it also ensures that system is not spending most of its time in
VMEXIT/VMENTRY because of VM that is waking/sleeping at high rate.
ratelimit can be disabled by setting it to 0.
Signed-off-by: Anshul Makkar <anshul.makkar@citrix.com>
---
---
xen/common/sched_credit2.c | 115 ++++++++++++++++++++++++++++++++++++++-------
1 file changed, 98 insertions(+), 17 deletions(-)
diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 1933ff1..6718574 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -171,6 +171,11 @@ integer_param("sched_credit2_migrate_resist", opt_migrate_resist);
#define c2r(_ops, _cpu) (CSCHED2_PRIV(_ops)->runq_map[(_cpu)])
/* CPU to runqueue struct macro */
#define RQD(_ops, _cpu) (&CSCHED2_PRIV(_ops)->rqd[c2r(_ops, _cpu)])
+/* Find the max of time slice */
+#define MAX_TSLICE(t1, t2) \
+ ({ typeof (t1) _t1 = (t1); \
+ typeof (t1) _t2 = (t2); \
+ _t1 > _t2 ? _t1 < 0 ? 0 : _t1 : _t2 < 0 ? 0 : _t2; })
/*
* Shifts for load average.
@@ -280,6 +285,7 @@ struct csched2_private {
struct csched2_runqueue_data rqd[NR_CPUS];
unsigned int load_window_shift;
+ unsigned ratelimit_us; /* each cpupool can have its onw ratelimit */
};
/*
@@ -1588,6 +1594,34 @@ csched2_dom_cntl(
return rc;
}
+static int csched2_sys_cntl(const struct scheduler *ops,
+ struct xen_sysctl_scheduler_op *sc)
+{
+ int rc = -EINVAL;
+ xen_sysctl_credit_schedule_t *params = &sc->u.sched_credit;
+ struct csched2_private *prv = CSCHED2_PRIV(ops);
+ unsigned long flags;
+
+ switch (sc->cmd )
+ {
+ case XEN_SYSCTL_SCHEDOP_putinfo:
+ if ( params->ratelimit_us &&
+ ( params->ratelimit_us < CSCHED2_MIN_TIMER ||
+ params->ratelimit_us > MICROSECS(CSCHED2_MAX_TIMER) ))
+ return rc;
+ spin_lock_irqsave(&prv->lock, flags);
+ prv->ratelimit_us = params->ratelimit_us;
+ spin_unlock_irqrestore(&prv->lock, flags);
+ break;
+
+ case XEN_SYSCTL_SCHEDOP_getinfo:
+ params->ratelimit_us = prv->ratelimit_us;
+ rc = 0;
+ break;
+ }
+ return rc;
+}
+
static void *
csched2_alloc_domdata(const struct scheduler *ops, struct domain *dom)
{
@@ -1657,12 +1691,15 @@ csched2_dom_destroy(const struct scheduler *ops, struct domain *dom)
/* How long should we let this vcpu run for? */
static s_time_t
-csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext)
+csched2_runtime(const struct scheduler *ops, int cpu,
+ struct csched2_vcpu *snext, s_time_t now)
{
- s_time_t time;
+ s_time_t time;
int rt_credit; /* Proposed runtime measured in credits */
struct csched2_runqueue_data *rqd = RQD(ops, cpu);
struct list_head *runq = &rqd->runq;
+ s_time_t runtime = 0;
+ struct csched2_private *prv = CSCHED2_PRIV(ops);
/*
* If we're idle, just stay so. Others (or external events)
@@ -1680,6 +1717,14 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext
/* 1) Basic time: Run until credit is 0. */
rt_credit = snext->credit;
+ if (snext->vcpu->is_running)
+ runtime = now - snext->vcpu->runstate.state_entry_time;
+ if ( runtime < 0 )
+ {
+ runtime = 0;
+ d2printk("%s: Time went backwards? now %"PRI_stime" state_entry_time %"PRI_stime"\n",
+ _func__, now, snext->runstate.state_entry_time);
+ }
/* 2) If there's someone waiting whose credit is positive,
* run until your credit ~= his */
@@ -1695,11 +1740,24 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext
}
/* The next guy may actually have a higher credit, if we've tried to
- * avoid migrating him from a different cpu. DTRT. */
+ * avoid migrating him from a different cpu. DTRT.
+ * Even if the next guy has higher credit and current vcpu has executed
+ * for less amount of time than rate limit, allow it to run for minimum
+ * amount of time.
+ */
if ( rt_credit <= 0 )
{
- time = CSCHED2_MIN_TIMER;
- SCHED_STAT_CRANK(runtime_min_timer);
+ if ( snext->vcpu->is_running && prv->ratelimit_us)
+ /* implies the current one has executed for time < ratelimit and thus
+ * it has neen selcted int runq_candidate to run next.
+ * No need to check for this condition again.
+ */
+ time = MAX_TSLICE(CSCHED2_MIN_TIMER,
+ MICROSECS(prv->ratelimit_us) - runtime);
+ else
+ time = MAX_TSLICE(CSCHED2_MIN_TIMER, MICROSECS(prv->ratelimit_us));
+
+ SCHED_STAT_CRANK(time);
}
else
{
@@ -1709,17 +1767,22 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext
* at a different rate. */
time = c2t(rqd, rt_credit, snext);
- /* Check limits */
- if ( time < CSCHED2_MIN_TIMER )
- {
- time = CSCHED2_MIN_TIMER;
- SCHED_STAT_CRANK(runtime_min_timer);
- }
- else if ( time > CSCHED2_MAX_TIMER )
+ /* Check limits.
+ * time > ratelimit --> time > MIN.
+ */
+ if ( time > CSCHED2_MAX_TIMER )
{
+
time = CSCHED2_MAX_TIMER;
SCHED_STAT_CRANK(runtime_max_timer);
}
+ else
+ {
+ time = MAX_TSLICE(MAX_TSLICE(CSCHED2_MIN_TIMER,
+ MICROSECS(prv->ratelimit_us)), time);
+ SCHED_STAT_CRANK(time);
+ }
+
}
return time;
@@ -1733,7 +1796,7 @@ void __dump_execstate(void *unused);
static struct csched2_vcpu *
runq_candidate(struct csched2_runqueue_data *rqd,
struct csched2_vcpu *scurr,
- int cpu, s_time_t now)
+ int cpu, s_time_t now, struct csched2_private *prv)
{
struct list_head *iter;
struct csched2_vcpu *snext = NULL;
@@ -1744,6 +1807,16 @@ runq_candidate(struct csched2_runqueue_data *rqd,
else
snext = CSCHED2_VCPU(idle_vcpu[cpu]);
+ /* return the current vcpu if it has executed for less than ratelimit.
+ * Adjuststment for the selected vcpu's credit and decision
+ * for how long it will run will be taken in csched2_runtime.
+ */
+ if ( prv->ratelimit_us && !is_idle_vcpu(scurr->vcpu) &&
+ vcpu_runnable(scurr->vcpu) &&
+ (now - scurr->vcpu->runstate.state_entry_time) <
+ MICROSECS(prv->ratelimit_us) )
+ return scurr;
+
list_for_each( iter, &rqd->runq )
{
struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem);
@@ -1762,9 +1835,13 @@ runq_candidate(struct csched2_runqueue_data *rqd,
}
/* If the next one on the list has more credit than current
- * (or idle, if current is not runnable), choose it. */
+ * (or idle, if current is not runnable) and current one has already
+ * executed for more than ratelimit. choose it.
+ * Control has reached here means that current vcpu has executed >
+ * ratelimit_us or ratelimit is off, so chose the next one.
+ */
if ( svc->credit > snext->credit )
- snext = svc;
+ snext = svc;
/* In any case, if we got this far, break. */
break;
@@ -1787,6 +1864,7 @@ csched2_schedule(
struct csched2_vcpu * const scurr = CSCHED2_VCPU(current);
struct csched2_vcpu *snext = NULL;
struct task_slice ret;
+ struct csched2_private *prv = CSCHED2_PRIV(ops);
SCHED_STAT_CRANK(schedule);
CSCHED2_VCPU_CHECK(current);
@@ -1857,7 +1935,7 @@ csched2_schedule(
snext = CSCHED2_VCPU(idle_vcpu[cpu]);
}
else
- snext=runq_candidate(rqd, scurr, cpu, now);
+ snext=runq_candidate(rqd, scurr, cpu, now, prv);
/* If switching from a non-idle runnable vcpu, put it
* back on the runqueue. */
@@ -1921,7 +1999,7 @@ csched2_schedule(
/*
* Return task to run next...
*/
- ret.time = csched2_runtime(ops, cpu, snext);
+ ret.time = csched2_runtime(ops, cpu, snext, now);
ret.task = snext->vcpu;
CSCHED2_VCPU_CHECK(ret.task);
@@ -2353,6 +2431,8 @@ csched2_init(struct scheduler *ops)
prv->runq_map[i] = -1;
prv->rqd[i].id = -1;
}
+ /* initialize ratelimit */
+ prv->ratelimit_us = 1000 * CSCHED2_MIN_TIMER;
prv->load_window_shift = opt_load_window_shift;
@@ -2385,6 +2465,7 @@ static const struct scheduler sched_credit2_def = {
.wake = csched2_vcpu_wake,
.adjust = csched2_dom_cntl,
+ .adjust_global = csched2_sys_cntl,
.pick_cpu = csched2_cpu_pick,
.migrate = csched2_vcpu_migrate,
--
1.9.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
next reply other threads:[~2016-07-06 17:33 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-07-06 17:33 Anshul [this message]
2016-07-07 9:59 ` [PATCH] credi2-ratelimit: Implement rate limit for credit2 scheduler Dario Faggioli
2016-07-12 16:16 ` George Dunlap
2016-07-13 8:53 ` Dario Faggioli
2016-07-13 11:08 ` George Dunlap
2016-07-18 11:54 ` anshul makkar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1467826414-17337-1-git-send-email-anshul.makkar@citrix.com \
--to=xen-devel@lists.xen.org \
--cc=anshul.makkar@citrix.com \
--cc=dario.faggioli@citrix.com \
--cc=george.dunlap@eu.citrix.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).