xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] credi2-ratelimit: Implement rate limit for credit2 scheduler
@ 2016-07-06 17:33 Anshul
  2016-07-07  9:59 ` Dario Faggioli
  2016-07-12 16:16 ` George Dunlap
  0 siblings, 2 replies; 6+ messages in thread
From: Anshul @ 2016-07-06 17:33 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap, dario.faggioli, Anshul Makkar

From: Anshul Makkar <anshul.makkar@citrix.com>

Rate limit assures that a vcpu will execute for a minimum amount of time before
being put at the back of a queue or being preempted by higher priority thread.

It introduces a minimum amount of latency to enable a VM to batch its work and
it also ensures that system is not spending most of its time in
VMEXIT/VMENTRY because of VM that is waking/sleeping at high rate.

ratelimit can be disabled by setting it to 0.

Signed-off-by: Anshul Makkar <anshul.makkar@citrix.com>
---
---
 xen/common/sched_credit2.c | 115 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 98 insertions(+), 17 deletions(-)

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 1933ff1..6718574 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -171,6 +171,11 @@ integer_param("sched_credit2_migrate_resist", opt_migrate_resist);
 #define c2r(_ops, _cpu)     (CSCHED2_PRIV(_ops)->runq_map[(_cpu)])
 /* CPU to runqueue struct macro */
 #define RQD(_ops, _cpu)     (&CSCHED2_PRIV(_ops)->rqd[c2r(_ops, _cpu)])
+/* Find the max of time slice */
+#define MAX_TSLICE(t1, t2)  \
+                   ({ typeof (t1) _t1 = (t1); \
+                      typeof (t1) _t2 = (t2); \
+                      _t1 > _t2 ? _t1 < 0 ? 0 : _t1 : _t2 < 0 ? 0 : _t2; })
 
 /*
  * Shifts for load average.
@@ -280,6 +285,7 @@ struct csched2_private {
     struct csched2_runqueue_data rqd[NR_CPUS];
 
     unsigned int load_window_shift;
+    unsigned ratelimit_us; /* each cpupool can have its onw ratelimit */
 };
 
 /*
@@ -1588,6 +1594,34 @@ csched2_dom_cntl(
     return rc;
 }
 
+static int csched2_sys_cntl(const struct scheduler *ops,
+                            struct xen_sysctl_scheduler_op *sc)
+{
+    int rc = -EINVAL;
+    xen_sysctl_credit_schedule_t *params = &sc->u.sched_credit;
+    struct csched2_private *prv = CSCHED2_PRIV(ops);
+    unsigned long flags;
+
+    switch (sc->cmd )
+    {
+        case XEN_SYSCTL_SCHEDOP_putinfo:
+            if ( params->ratelimit_us &&
+                ( params->ratelimit_us < CSCHED2_MIN_TIMER ||
+                  params->ratelimit_us > MICROSECS(CSCHED2_MAX_TIMER) ))
+                return rc;
+            spin_lock_irqsave(&prv->lock, flags);
+            prv->ratelimit_us = params->ratelimit_us;
+            spin_unlock_irqrestore(&prv->lock, flags);
+            break;
+
+        case XEN_SYSCTL_SCHEDOP_getinfo:
+            params->ratelimit_us = prv->ratelimit_us;
+            rc = 0;
+            break;
+    }
+    return rc;
+}
+
 static void *
 csched2_alloc_domdata(const struct scheduler *ops, struct domain *dom)
 {
@@ -1657,12 +1691,15 @@ csched2_dom_destroy(const struct scheduler *ops, struct domain *dom)
 
 /* How long should we let this vcpu run for? */
 static s_time_t
-csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext)
+csched2_runtime(const struct scheduler *ops, int cpu,
+                struct csched2_vcpu *snext, s_time_t now)
 {
-    s_time_t time; 
+    s_time_t time;
     int rt_credit; /* Proposed runtime measured in credits */
     struct csched2_runqueue_data *rqd = RQD(ops, cpu);
     struct list_head *runq = &rqd->runq;
+    s_time_t runtime = 0;
+    struct csched2_private *prv = CSCHED2_PRIV(ops);
 
     /*
      * If we're idle, just stay so. Others (or external events)
@@ -1680,6 +1717,14 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext
 
     /* 1) Basic time: Run until credit is 0. */
     rt_credit = snext->credit;
+    if (snext->vcpu->is_running)
+        runtime = now - snext->vcpu->runstate.state_entry_time;
+    if ( runtime < 0 )
+    {
+        runtime = 0;
+        d2printk("%s: Time went backwards? now %"PRI_stime" state_entry_time %"PRI_stime"\n",
+                  _func__, now, snext->runstate.state_entry_time);
+    }
 
     /* 2) If there's someone waiting whose credit is positive,
      * run until your credit ~= his */
@@ -1695,11 +1740,24 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext
     }
 
     /* The next guy may actually have a higher credit, if we've tried to
-     * avoid migrating him from a different cpu.  DTRT.  */
+     * avoid migrating him from a different cpu.  DTRT.
+     * Even if the next guy has higher credit and current vcpu has executed
+     * for less amount of time than rate limit, allow it to run for minimum
+     * amount of time.
+     */
     if ( rt_credit <= 0 )
     {
-        time = CSCHED2_MIN_TIMER;
-        SCHED_STAT_CRANK(runtime_min_timer);
+        if ( snext->vcpu->is_running && prv->ratelimit_us)
+           /* implies the current one has executed for time < ratelimit and thus
+            * it has neen selcted int runq_candidate to run next.
+            * No need to check for this condition again.
+            */
+            time = MAX_TSLICE(CSCHED2_MIN_TIMER,
+                               MICROSECS(prv->ratelimit_us) - runtime);
+        else
+            time = MAX_TSLICE(CSCHED2_MIN_TIMER, MICROSECS(prv->ratelimit_us));
+
+        SCHED_STAT_CRANK(time);
     }
     else
     {
@@ -1709,17 +1767,22 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext
          * at a different rate. */
         time = c2t(rqd, rt_credit, snext);
 
-        /* Check limits */
-        if ( time < CSCHED2_MIN_TIMER )
-        {
-            time = CSCHED2_MIN_TIMER;
-            SCHED_STAT_CRANK(runtime_min_timer);
-        }
-        else if ( time > CSCHED2_MAX_TIMER )
+        /* Check limits.
+         * time > ratelimit --> time > MIN.
+         */
+        if ( time > CSCHED2_MAX_TIMER )
         {
+
             time = CSCHED2_MAX_TIMER;
             SCHED_STAT_CRANK(runtime_max_timer);
         }
+        else
+        {
+            time = MAX_TSLICE(MAX_TSLICE(CSCHED2_MIN_TIMER,
+                                          MICROSECS(prv->ratelimit_us)), time);
+            SCHED_STAT_CRANK(time);
+        }
+
     }
 
     return time;
@@ -1733,7 +1796,7 @@ void __dump_execstate(void *unused);
 static struct csched2_vcpu *
 runq_candidate(struct csched2_runqueue_data *rqd,
                struct csched2_vcpu *scurr,
-               int cpu, s_time_t now)
+               int cpu, s_time_t now, struct csched2_private *prv)
 {
     struct list_head *iter;
     struct csched2_vcpu *snext = NULL;
@@ -1744,6 +1807,16 @@ runq_candidate(struct csched2_runqueue_data *rqd,
     else
         snext = CSCHED2_VCPU(idle_vcpu[cpu]);
 
+    /* return the current vcpu if it has executed for less than ratelimit.
+     * Adjuststment for the selected vcpu's credit and decision
+     * for how long it will run will be taken in csched2_runtime.
+     */
+    if ( prv->ratelimit_us && !is_idle_vcpu(scurr->vcpu) &&
+         vcpu_runnable(scurr->vcpu) &&
+         (now - scurr->vcpu->runstate.state_entry_time) <
+          MICROSECS(prv->ratelimit_us) )
+        return scurr;
+
     list_for_each( iter, &rqd->runq )
     {
         struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem);
@@ -1762,9 +1835,13 @@ runq_candidate(struct csched2_runqueue_data *rqd,
         }
 
         /* If the next one on the list has more credit than current
-         * (or idle, if current is not runnable), choose it. */
+         * (or idle, if current is not runnable) and current one has already
+         * executed for more than ratelimit. choose it.
+         * Control has reached here means that current vcpu has executed >
+         * ratelimit_us or ratelimit is off, so chose the next one.
+         */
         if ( svc->credit > snext->credit )
-            snext = svc;
+                snext = svc;
 
         /* In any case, if we got this far, break. */
         break;
@@ -1787,6 +1864,7 @@ csched2_schedule(
     struct csched2_vcpu * const scurr = CSCHED2_VCPU(current);
     struct csched2_vcpu *snext = NULL;
     struct task_slice ret;
+    struct csched2_private *prv = CSCHED2_PRIV(ops);
 
     SCHED_STAT_CRANK(schedule);
     CSCHED2_VCPU_CHECK(current);
@@ -1857,7 +1935,7 @@ csched2_schedule(
         snext = CSCHED2_VCPU(idle_vcpu[cpu]);
     }
     else
-        snext=runq_candidate(rqd, scurr, cpu, now);
+        snext=runq_candidate(rqd, scurr, cpu, now, prv);
 
     /* If switching from a non-idle runnable vcpu, put it
      * back on the runqueue. */
@@ -1921,7 +1999,7 @@ csched2_schedule(
     /*
      * Return task to run next...
      */
-    ret.time = csched2_runtime(ops, cpu, snext);
+    ret.time = csched2_runtime(ops, cpu, snext, now);
     ret.task = snext->vcpu;
 
     CSCHED2_VCPU_CHECK(ret.task);
@@ -2353,6 +2431,8 @@ csched2_init(struct scheduler *ops)
         prv->runq_map[i] = -1;
         prv->rqd[i].id = -1;
     }
+    /* initialize ratelimit */
+    prv->ratelimit_us = 1000 * CSCHED2_MIN_TIMER;
 
     prv->load_window_shift = opt_load_window_shift;
 
@@ -2385,6 +2465,7 @@ static const struct scheduler sched_credit2_def = {
     .wake           = csched2_vcpu_wake,
 
     .adjust         = csched2_dom_cntl,
+    .adjust_global  = csched2_sys_cntl,
 
     .pick_cpu       = csched2_cpu_pick,
     .migrate        = csched2_vcpu_migrate,
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] credi2-ratelimit: Implement rate limit for credit2 scheduler
  2016-07-06 17:33 [PATCH] credi2-ratelimit: Implement rate limit for credit2 scheduler Anshul
@ 2016-07-07  9:59 ` Dario Faggioli
  2016-07-12 16:16 ` George Dunlap
  1 sibling, 0 replies; 6+ messages in thread
From: Dario Faggioli @ 2016-07-07  9:59 UTC (permalink / raw)
  To: anshul.makkar, xen-devel; +Cc: george.dunlap


[-- Attachment #1.1: Type: text/plain, Size: 9516 bytes --]

On Wed, 2016-07-06 at 18:33 +0100, anshul.makkar@citrix.com wrote:
> From: Anshul Makkar <anshul.makkar@citrix.com>
> 
Hey, Anshul,

Thanks for doing this!

> Rate limit assures that a vcpu will execute for a minimum amount of
> time before
> being put at the back of a queue or being preempted by higher
> priority thread.
> 
I'd say "before a context switch occurs", or "before a context switch
is allowed".

> It introduces a minimum amount of latency to enable a VM to batch its
> work and
> it also ensures that system is not spending most of its time in
> VMEXIT/VMENTRY because of VM that is waking/sleeping at high rate.
> 
Again, something like, "ensures that context switches does not happen
too frequently" seems more accurate and useful as a description.

I also wouldn't use the word "latency", above, to describe the time a
VM has to carry on some work uninterrupted.

> ratelimit can be disabled by setting it to 0.
> 
Good.

> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -171,6 +171,11 @@ integer_param("sched_credit2_migrate_resist",
> opt_migrate_resist);
>  #define c2r(_ops, _cpu)     (CSCHED2_PRIV(_ops)->runq_map[(_cpu)])
>  /* CPU to runqueue struct macro */
>  #define RQD(_ops, _cpu)     (&CSCHED2_PRIV(_ops)->rqd[c2r(_ops,
> _cpu)])
> +/* Find the max of time slice */
> +#define MAX_TSLICE(t1, t2)  \
> +                   ({ typeof (t1) _t1 = (t1); \
> +                      typeof (t1) _t2 = (t2); \
> +                      _t1 > _t2 ? _t1 < 0 ? 0 : _t1 : _t2 < 0 ? 0 :
> _t2; })
> 
I'm bad at this things, but isn't subtracting the two values a more
efficient, effective and easier to read way to do this?

Also, I think this macro can be called something like MAX_STIME(), or
STIME_BEFORE (or something like that) and be put in time.h.

> @@ -280,6 +285,7 @@ struct csched2_private {
>      struct csched2_runqueue_data rqd[NR_CPUS];
>  
>      unsigned int load_window_shift;
> +    unsigned ratelimit_us; /* each cpupool can have its onw
> ratelimit */
>  };
>  
>  /*
> @@ -1588,6 +1594,34 @@ csched2_dom_cntl(
>      return rc;
>  }
>  
> +static int csched2_sys_cntl(const struct scheduler *ops,
> +                            struct xen_sysctl_scheduler_op *sc)
> +{
> +    int rc = -EINVAL;
> +    xen_sysctl_credit_schedule_t *params = &sc->u.sched_credit;
> +    struct csched2_private *prv = CSCHED2_PRIV(ops);
> +    unsigned long flags;
> +
> +    switch (sc->cmd )
> +    {
> +        case XEN_SYSCTL_SCHEDOP_putinfo:
> +            if ( params->ratelimit_us &&
> +                ( params->ratelimit_us < CSCHED2_MIN_TIMER ||
> +                  params->ratelimit_us >
> MICROSECS(CSCHED2_MAX_TIMER) ))
>
I see the link with CSCHED2_MIN_TIMER. Still, I think we can just
reuse XEN_SYSCTL_SCHED_RATELIMIT_MAX and _MIN from sysctl.h.

In fact, the minimum interval of time for programming a timer and the
minimum (theoretical) value usable for rate-limiting are, logically,
two different things. Not to mention, that MIN_TIMER is an internal,
implementation detail, which we may want to change at some point, for
various reasons, and it's not good to have one user facing parameter
bound to something like that, IMO.

This also will make things more consistent (between Credit1 and
Credit2).

> +                return rc;
> +            spin_lock_irqsave(&prv->lock, flags);
> +            prv->ratelimit_us = params->ratelimit_us;
> +            spin_unlock_irqrestore(&prv->lock, flags);
> +            break;
> +
In Credit1, this break is not there, and putinfo also returns back the
value that it has just written. I don't feel like this is terribly
important, but, as said, there's value in keeping the two schedulers in
sync, with respect to the interface to this.

> @@ -1680,6 +1717,14 @@ csched2_runtime(const struct scheduler *ops,
> int cpu, struct csched2_vcpu *snext
>  
>      /* 1) Basic time: Run until credit is 0. */
>      rt_credit = snext->credit;
> +    if (snext->vcpu->is_running)
> +        runtime = now - snext->vcpu->runstate.state_entry_time;
> +    if ( runtime < 0 )
> +    {
> +        runtime = 0;
> +        d2printk("%s: Time went backwards? now %"PRI_stime"
> state_entry_time %"PRI_stime"\n",
> +                  _func__, now, snext->runstate.state_entry_time);
> +    }
>  
>      /* 2) If there's someone waiting whose credit is positive,
>       * run until your credit ~= his */
> @@ -1695,11 +1740,24 @@ csched2_runtime(const struct scheduler *ops,
> int cpu, struct csched2_vcpu *snext
>      }
>  
>      /* The next guy may actually have a higher credit, if we've
> tried to
> -     * avoid migrating him from a different cpu.  DTRT.  */
> +     * avoid migrating him from a different cpu.  DTRT.
> +     * Even if the next guy has higher credit and current vcpu has
> executed
> +     * for less amount of time than rate limit, allow it to run for
> minimum
> +     * amount of time.
> +     */
>      if ( rt_credit <= 0 )
>      {
> -        time = CSCHED2_MIN_TIMER;
> -        SCHED_STAT_CRANK(runtime_min_timer);
> +        if ( snext->vcpu->is_running && prv->ratelimit_us)
> +           /* implies the current one has executed for time <
> ratelimit and thus
> +            * it has neen selcted int runq_candidate to run next.
> +            * No need to check for this condition again.
> +            */
> +            time = MAX_TSLICE(CSCHED2_MIN_TIMER,
> +                               MICROSECS(prv->ratelimit_us) -
> runtime);
> +        else
> +            time = MAX_TSLICE(CSCHED2_MIN_TIMER, MICROSECS(prv-
> >ratelimit_us));
> +
> +        SCHED_STAT_CRANK(time);
>
If you want to define a new performance counter, you need more than
just this. But I think you should just not do that for now...

>      }
>      else
>      {
> @@ -1709,17 +1767,22 @@ csched2_runtime(const struct scheduler *ops,
> int cpu, struct csched2_vcpu *snext
>           * at a different rate. */
>          time = c2t(rqd, rt_credit, snext);
>  
> -        /* Check limits */
> -        if ( time < CSCHED2_MIN_TIMER )
> -        {
> -            time = CSCHED2_MIN_TIMER;
> -            SCHED_STAT_CRANK(runtime_min_timer);
> -        }
> -        else if ( time > CSCHED2_MAX_TIMER )
> +        /* Check limits.
> +         * time > ratelimit --> time > MIN.
> +         */
> +        if ( time > CSCHED2_MAX_TIMER )
>          {
> +
>              time = CSCHED2_MAX_TIMER;
>              SCHED_STAT_CRANK(runtime_max_timer);
>          }
> +        else
> +        {
> +            time = MAX_TSLICE(MAX_TSLICE(CSCHED2_MIN_TIMER,
> +                                          MICROSECS(prv-
> >ratelimit_us)), time);
> +            SCHED_STAT_CRANK(time);
> +        }
> +
>      }
>  
So, I've been thinking... Why don't we "just" fixup, right here, the
value that we are about to return?

I.e., basically we return the max between time and
prv->runtime_us - runtime (with runtime being =0 in case snext hasn't
run yet). Or am I missing something?

>      return time;

> @@ -1762,9 +1835,13 @@ runq_candidate(struct csched2_runqueue_data
> *rqd,
>          }
>  
>          /* If the next one on the list has more credit than current
> -         * (or idle, if current is not runnable), choose it. */
> +         * (or idle, if current is not runnable) and current one has
> already
> +         * executed for more than ratelimit. choose it.
> +         * Control has reached here means that current vcpu has
> executed >
> +         * ratelimit_us or ratelimit is off, so chose the next one.
> +         */
>          if ( svc->credit > snext->credit )
> -            snext = svc;
> +                snext = svc;
>  
I don't think I see the need for these changes (with this line here
above being more wrong than pointless, actually).

> @@ -2353,6 +2431,8 @@ csched2_init(struct scheduler *ops)
>          prv->runq_map[i] = -1;
>          prv->rqd[i].id = -1;
>      }
> +    /* initialize ratelimit */
> +    prv->ratelimit_us = 1000 * CSCHED2_MIN_TIMER;
>  
>      prv->load_window_shift = opt_load_window_shift;
>
Mmm... I'd rather set this to either 0 (so, disabled)
or XEN_SYSCTL_CSCHED_TSLICE_MIN (but I indeed prefer 0) by default.
Then do some experiments, see if it helps and try to come up with a
reasonable value.

Thanks and regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] credi2-ratelimit: Implement rate limit for credit2 scheduler
  2016-07-06 17:33 [PATCH] credi2-ratelimit: Implement rate limit for credit2 scheduler Anshul
  2016-07-07  9:59 ` Dario Faggioli
@ 2016-07-12 16:16 ` George Dunlap
  2016-07-13  8:53   ` Dario Faggioli
  1 sibling, 1 reply; 6+ messages in thread
From: George Dunlap @ 2016-07-12 16:16 UTC (permalink / raw)
  To: Anshul Makkar; +Cc: Dario Faggioli, xen-devel

[-- Attachment #1: Type: text/plain, Size: 13894 bytes --]

On Wed, Jul 6, 2016 at 6:33 PM, Makkar anshul.makkar@citrix.com <Anshul> wrote:
> From: Anshul Makkar <anshul.makkar@citrix.com>
>
> Rate limit assures that a vcpu will execute for a minimum amount of time before
> being put at the back of a queue or being preempted by higher priority thread.
>
> It introduces a minimum amount of latency to enable a VM to batch its work and
> it also ensures that system is not spending most of its time in
> VMEXIT/VMENTRY because of VM that is waking/sleeping at high rate.
>
> ratelimit can be disabled by setting it to 0.
>
> Signed-off-by: Anshul Makkar <anshul.makkar@citrix.com>

Hey Anshul!  Thanks for submitting this.  A few comments.

> ---
> ---
>  xen/common/sched_credit2.c | 115 ++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 98 insertions(+), 17 deletions(-)
>
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index 1933ff1..6718574 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -171,6 +171,11 @@ integer_param("sched_credit2_migrate_resist", opt_migrate_resist);
>  #define c2r(_ops, _cpu)     (CSCHED2_PRIV(_ops)->runq_map[(_cpu)])
>  /* CPU to runqueue struct macro */
>  #define RQD(_ops, _cpu)     (&CSCHED2_PRIV(_ops)->rqd[c2r(_ops, _cpu)])
> +/* Find the max of time slice */
> +#define MAX_TSLICE(t1, t2)  \
> +                   ({ typeof (t1) _t1 = (t1); \
> +                      typeof (t1) _t2 = (t2); \
> +                      _t1 > _t2 ? _t1 < 0 ? 0 : _t1 : _t2 < 0 ? 0 : _t2; })

Hiding the zero-checking behind this seems a bit strange to me.  I
think if the algorithm is properly laid out, we probably don't need
this at all (see my proposed alternate below).

>
>  /*
>   * Shifts for load average.
> @@ -280,6 +285,7 @@ struct csched2_private {
>      struct csched2_runqueue_data rqd[NR_CPUS];
>
>      unsigned int load_window_shift;
> +    unsigned ratelimit_us; /* each cpupool can have its onw ratelimit */
>  };
>
>  /*
> @@ -1588,6 +1594,34 @@ csched2_dom_cntl(
>      return rc;
>  }
>
> +static int csched2_sys_cntl(const struct scheduler *ops,
> +                            struct xen_sysctl_scheduler_op *sc)
> +{
> +    int rc = -EINVAL;
> +    xen_sysctl_credit_schedule_t *params = &sc->u.sched_credit;
> +    struct csched2_private *prv = CSCHED2_PRIV(ops);
> +    unsigned long flags;
> +
> +    switch (sc->cmd )
> +    {
> +        case XEN_SYSCTL_SCHEDOP_putinfo:
> +            if ( params->ratelimit_us &&
> +                ( params->ratelimit_us < CSCHED2_MIN_TIMER ||
> +                  params->ratelimit_us > MICROSECS(CSCHED2_MAX_TIMER) ))
> +                return rc;
> +            spin_lock_irqsave(&prv->lock, flags);
> +            prv->ratelimit_us = params->ratelimit_us;
> +            spin_unlock_irqrestore(&prv->lock, flags);
> +            break;
> +
> +        case XEN_SYSCTL_SCHEDOP_getinfo:
> +            params->ratelimit_us = prv->ratelimit_us;
> +            rc = 0;
> +            break;
> +    }
> +    return rc;
> +}
> +
>  static void *
>  csched2_alloc_domdata(const struct scheduler *ops, struct domain *dom)
>  {
> @@ -1657,12 +1691,15 @@ csched2_dom_destroy(const struct scheduler *ops, struct domain *dom)
>
>  /* How long should we let this vcpu run for? */
>  static s_time_t
> -csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext)
> +csched2_runtime(const struct scheduler *ops, int cpu,
> +                struct csched2_vcpu *snext, s_time_t now)
>  {
> -    s_time_t time;
> +    s_time_t time;
>      int rt_credit; /* Proposed runtime measured in credits */
>      struct csched2_runqueue_data *rqd = RQD(ops, cpu);
>      struct list_head *runq = &rqd->runq;
> +    s_time_t runtime = 0;
> +    struct csched2_private *prv = CSCHED2_PRIV(ops);
>
>      /*
>       * If we're idle, just stay so. Others (or external events)
> @@ -1680,6 +1717,14 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext
>
>      /* 1) Basic time: Run until credit is 0. */
>      rt_credit = snext->credit;
> +    if (snext->vcpu->is_running)
> +        runtime = now - snext->vcpu->runstate.state_entry_time;
> +    if ( runtime < 0 )
> +    {
> +        runtime = 0;
> +        d2printk("%s: Time went backwards? now %"PRI_stime" state_entry_time %"PRI_stime"\n",
> +                  _func__, now, snext->runstate.state_entry_time);
> +    }
>
>      /* 2) If there's someone waiting whose credit is positive,
>       * run until your credit ~= his */
> @@ -1695,11 +1740,24 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext
>      }
>
>      /* The next guy may actually have a higher credit, if we've tried to
> -     * avoid migrating him from a different cpu.  DTRT.  */
> +     * avoid migrating him from a different cpu.  DTRT.
> +     * Even if the next guy has higher credit and current vcpu has executed
> +     * for less amount of time than rate limit, allow it to run for minimum
> +     * amount of time.
> +     */
>      if ( rt_credit <= 0 )
>      {
> -        time = CSCHED2_MIN_TIMER;
> -        SCHED_STAT_CRANK(runtime_min_timer);
> +        if ( snext->vcpu->is_running && prv->ratelimit_us)
> +           /* implies the current one has executed for time < ratelimit and thus
> +            * it has neen selcted int runq_candidate to run next.
> +            * No need to check for this condition again.
> +            */
> +            time = MAX_TSLICE(CSCHED2_MIN_TIMER,
> +                               MICROSECS(prv->ratelimit_us) - runtime);
> +        else
> +            time = MAX_TSLICE(CSCHED2_MIN_TIMER, MICROSECS(prv->ratelimit_us));
> +
> +        SCHED_STAT_CRANK(time);
>      }
>      else
>      {
> @@ -1709,17 +1767,22 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext
>           * at a different rate. */
>          time = c2t(rqd, rt_credit, snext);
>
> -        /* Check limits */
> -        if ( time < CSCHED2_MIN_TIMER )
> -        {
> -            time = CSCHED2_MIN_TIMER;
> -            SCHED_STAT_CRANK(runtime_min_timer);
> -        }
> -        else if ( time > CSCHED2_MAX_TIMER )
> +        /* Check limits.
> +         * time > ratelimit --> time > MIN.
> +         */
> +        if ( time > CSCHED2_MAX_TIMER )
>          {
> +
>              time = CSCHED2_MAX_TIMER;
>              SCHED_STAT_CRANK(runtime_max_timer);
>          }
> +        else
> +        {
> +            time = MAX_TSLICE(MAX_TSLICE(CSCHED2_MIN_TIMER,
> +                                          MICROSECS(prv->ratelimit_us)), time);
> +            SCHED_STAT_CRANK(time);
> +        }
> +
>      }

This is all really confusing.  What about something like this (also
attached)?  The basic idea is to calculate min_time, then go through
the normal algorithm, then clip it once at the end.

@@ -1657,12 +1691,14 @@ csched2_dom_destroy(const struct scheduler
*ops, struct domain *dom)

 /* How long should we let this vcpu run for? */
 static s_time_t
-csched2_runtime(const struct scheduler *ops, int cpu, struct
csched2_vcpu *snext)
+csched2_runtime(const struct scheduler *ops, int cpu,
+                struct csched2_vcpu *snext, s_time_t now)
 {
-    s_time_t time;
+    s_time_t time, min_time;
     int rt_credit; /* Proposed runtime measured in credits */
     struct csched2_runqueue_data *rqd = RQD(ops, cpu);
     struct list_head *runq = &rqd->runq;
+    struct csched2_private *prv = CSCHED2_PRIV(ops);

     /*
      * If we're idle, just stay so. Others (or external events)
@@ -1675,9 +1711,19 @@ csched2_runtime(const struct scheduler *ops,
int cpu, struct csched2_vcpu *snext
      * 1) Run until snext's credit will be 0
      * 2) But if someone is waiting, run until snext's credit is equal
      * to his
-     * 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER.
+     * 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER
+     * or your the ratelimit time.
      */

+    /* Calculate mintime */
+    min_time = CSCHED2_MIN_TIMER;
+    if ( prv->ratelimit_us ) {
+        s_time_t ratelimit_min = snext->vcpu->runstate.state_entry_time +
+            MICROSECS(prv->ratelimit_us) - now;
+        if ( ratelimit_min > min_time )
+            min_time = ratelimit_min;
+    }
+
     /* 1) Basic time: Run until credit is 0. */
     rt_credit = snext->credit;

@@ -1694,32 +1740,33 @@ csched2_runtime(const struct scheduler *ops,
int cpu, struct csched2_vcpu *snext
         }
     }

-    /* The next guy may actually have a higher credit, if we've tried to
-     * avoid migrating him from a different cpu.  DTRT.  */
-    if ( rt_credit <= 0 )
+    /*
+     * The next guy on the runqueue may actually have a higher credit,
+     * if we've tried to avoid migrating him from a different cpu.
+     * Setting time=0 will ensure the minimum timeslice is chosen.
+     *
+     * FIXME: See if we can eliminate this conversion if we know time
+     * will be outside (MIN,MAX).  Probably requires pre-calculating
+     * credit values of MIN,MAX per vcpu, since each vcpu burns credit
+     * at a different rate.
+     */
+    if ( rt_credit > 0 )
+        time = c2t(rqd, rt_credit, snext);
+    else
+        time = 0;
+
+
+    /* 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER
+     * or your ratelimit time */
+    if ( time < min_time )
     {
-        time = CSCHED2_MIN_TIMER;
+        time = min_time;
         SCHED_STAT_CRANK(runtime_min_timer);
     }
-    else
+    else if ( time > CSCHED2_MAX_TIMER )
     {
-        /* FIXME: See if we can eliminate this conversion if we know time
-         * will be outside (MIN,MAX).  Probably requires pre-calculating
-         * credit values of MIN,MAX per vcpu, since each vcpu burns credit
-         * at a different rate. */
-        time = c2t(rqd, rt_credit, snext);
-
-        /* Check limits */
-        if ( time < CSCHED2_MIN_TIMER )
-        {
-            time = CSCHED2_MIN_TIMER;
-            SCHED_STAT_CRANK(runtime_min_timer);
-        }
-        else if ( time > CSCHED2_MAX_TIMER )
-        {
-            time = CSCHED2_MAX_TIMER;
-            SCHED_STAT_CRANK(runtime_max_timer);
-        }
+        time = CSCHED2_MAX_TIMER;
+        SCHED_STAT_CRANK(runtime_max_timer);
     }

     return time;


>
>      return time;
> @@ -1733,7 +1796,7 @@ void __dump_execstate(void *unused);
>  static struct csched2_vcpu *
>  runq_candidate(struct csched2_runqueue_data *rqd,
>                 struct csched2_vcpu *scurr,
> -               int cpu, s_time_t now)
> +               int cpu, s_time_t now, struct csched2_private *prv)

Since we have the cpu, I think we can get ops this way, without
cluttering things up with the extra argument:

    struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));

>  {
>      struct list_head *iter;
>      struct csched2_vcpu *snext = NULL;
> @@ -1744,6 +1807,16 @@ runq_candidate(struct csched2_runqueue_data *rqd,
>      else
>          snext = CSCHED2_VCPU(idle_vcpu[cpu]);
>
> +    /* return the current vcpu if it has executed for less than ratelimit.
> +     * Adjuststment for the selected vcpu's credit and decision
> +     * for how long it will run will be taken in csched2_runtime.
> +     */
> +    if ( prv->ratelimit_us && !is_idle_vcpu(scurr->vcpu) &&
> +         vcpu_runnable(scurr->vcpu) &&
> +         (now - scurr->vcpu->runstate.state_entry_time) <
> +          MICROSECS(prv->ratelimit_us) )
> +        return scurr;
> +

This looks OK.  The comment should probably follow the general hypervisor style:
/*
 * blah blah blah
 */

I realize most of the file isn't like this, but that was probably a mistake. :-)

>      list_for_each( iter, &rqd->runq )
>      {
>          struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem);
> @@ -1762,9 +1835,13 @@ runq_candidate(struct csched2_runqueue_data *rqd,
>          }
>
>          /* If the next one on the list has more credit than current
> -         * (or idle, if current is not runnable), choose it. */
> +         * (or idle, if current is not runnable) and current one has already
> +         * executed for more than ratelimit. choose it.
> +         * Control has reached here means that current vcpu has executed >
> +         * ratelimit_us or ratelimit is off, so chose the next one.
> +         */
>          if ( svc->credit > snext->credit )
> -            snext = svc;
> +                snext = svc;

I agree with Dario; this comment change isn't very helpful.

>
>          /* In any case, if we got this far, break. */
>          break;
> @@ -1787,6 +1864,7 @@ csched2_schedule(
>      struct csched2_vcpu * const scurr = CSCHED2_VCPU(current);
>      struct csched2_vcpu *snext = NULL;
>      struct task_slice ret;
> +    struct csched2_private *prv = CSCHED2_PRIV(ops);
>
>      SCHED_STAT_CRANK(schedule);
>      CSCHED2_VCPU_CHECK(current);
> @@ -1857,7 +1935,7 @@ csched2_schedule(
>          snext = CSCHED2_VCPU(idle_vcpu[cpu]);
>      }
>      else
> -        snext=runq_candidate(rqd, scurr, cpu, now);
> +        snext=runq_candidate(rqd, scurr, cpu, now, prv);
>
>      /* If switching from a non-idle runnable vcpu, put it
>       * back on the runqueue. */
> @@ -1921,7 +1999,7 @@ csched2_schedule(
>      /*
>       * Return task to run next...
>       */
> -    ret.time = csched2_runtime(ops, cpu, snext);
> +    ret.time = csched2_runtime(ops, cpu, snext, now);
>      ret.task = snext->vcpu;
>
>      CSCHED2_VCPU_CHECK(ret.task);
> @@ -2353,6 +2431,8 @@ csched2_init(struct scheduler *ops)
>          prv->runq_map[i] = -1;
>          prv->rqd[i].id = -1;
>      }
> +    /* initialize ratelimit */
> +    prv->ratelimit_us = 1000 * CSCHED2_MIN_TIMER;

Is there any reason this isn't using sched_ratelimit_us (the
hypervisor command-line option, which the credit scheduler is using)?

Thanks again!

 -George

[-- Attachment #2: example.diff --]
[-- Type: text/plain, Size: 3633 bytes --]

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 1933ff1..97a5191 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -1657,12 +1691,14 @@ csched2_dom_destroy(const struct scheduler *ops, struct domain *dom)
 
 /* How long should we let this vcpu run for? */
 static s_time_t
-csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext)
+csched2_runtime(const struct scheduler *ops, int cpu,
+                struct csched2_vcpu *snext, s_time_t now)
 {
-    s_time_t time; 
+    s_time_t time, min_time; 
     int rt_credit; /* Proposed runtime measured in credits */
     struct csched2_runqueue_data *rqd = RQD(ops, cpu);
     struct list_head *runq = &rqd->runq;
+    struct csched2_private *prv = CSCHED2_PRIV(ops);
 
     /*
      * If we're idle, just stay so. Others (or external events)
@@ -1675,9 +1711,19 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext
      * 1) Run until snext's credit will be 0
      * 2) But if someone is waiting, run until snext's credit is equal
      * to his
-     * 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER.
+     * 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER
+     * or your the ratelimit time.
      */
 
+    /* Calculate mintime */
+    min_time = CSCHED2_MIN_TIMER;
+    if ( prv->ratelimit_us ) {
+        s_time_t ratelimit_min = snext->vcpu->runstate.state_entry_time +
+            MICROSECS(prv->ratelimit_us) - now;
+        if ( ratelimit_min > min_time )
+            min_time = ratelimit_min;
+    }
+    
     /* 1) Basic time: Run until credit is 0. */
     rt_credit = snext->credit;
 
@@ -1694,32 +1740,33 @@ csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext
         }
     }
 
-    /* The next guy may actually have a higher credit, if we've tried to
-     * avoid migrating him from a different cpu.  DTRT.  */
-    if ( rt_credit <= 0 )
+    /* 
+     * The next guy on the runqueue may actually have a higher credit,
+     * if we've tried to avoid migrating him from a different cpu.
+     * Setting time=0 will ensure the minimum timeslice is chosen.
+     *
+     * FIXME: See if we can eliminate this conversion if we know time
+     * will be outside (MIN,MAX).  Probably requires pre-calculating
+     * credit values of MIN,MAX per vcpu, since each vcpu burns credit
+     * at a different rate. 
+     */
+    if ( rt_credit > 0 )
+        time = c2t(rqd, rt_credit, snext);
+    else
+        time = 0;
+
+    
+    /* 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER
+     * or your ratelimit time */
+    if ( time < min_time )
     {
-        time = CSCHED2_MIN_TIMER;
+        time = min_time;
         SCHED_STAT_CRANK(runtime_min_timer);
     }
-    else
+    else if ( time > CSCHED2_MAX_TIMER )
     {
-        /* FIXME: See if we can eliminate this conversion if we know time
-         * will be outside (MIN,MAX).  Probably requires pre-calculating
-         * credit values of MIN,MAX per vcpu, since each vcpu burns credit
-         * at a different rate. */
-        time = c2t(rqd, rt_credit, snext);
-
-        /* Check limits */
-        if ( time < CSCHED2_MIN_TIMER )
-        {
-            time = CSCHED2_MIN_TIMER;
-            SCHED_STAT_CRANK(runtime_min_timer);
-        }
-        else if ( time > CSCHED2_MAX_TIMER )
-        {
-            time = CSCHED2_MAX_TIMER;
-            SCHED_STAT_CRANK(runtime_max_timer);
-        }
+        time = CSCHED2_MAX_TIMER;
+        SCHED_STAT_CRANK(runtime_max_timer);
     }
 
     return time;

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] credi2-ratelimit: Implement rate limit for credit2 scheduler
  2016-07-12 16:16 ` George Dunlap
@ 2016-07-13  8:53   ` Dario Faggioli
  2016-07-13 11:08     ` George Dunlap
  2016-07-18 11:54     ` anshul makkar
  0 siblings, 2 replies; 6+ messages in thread
From: Dario Faggioli @ 2016-07-13  8:53 UTC (permalink / raw)
  To: George Dunlap, Anshul Makkar; +Cc: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 5419 bytes --]

On Tue, 2016-07-12 at 17:16 +0100, George Dunlap wrote:
> On Wed, Jul 6, 2016 at 6:33 PM, Makkar anshul.makkar@citrix.com
> <Anshul> wrote:
> > 
> > --- a/xen/common/sched_credit2.c
> > +++ b/xen/common/sched_credit2.c
> > @@ -171,6 +171,11 @@ integer_param("sched_credit2_migrate_resist",
> > opt_migrate_resist);
> >  #define c2r(_ops, _cpu)     (CSCHED2_PRIV(_ops)->runq_map[(_cpu)])
> >  /* CPU to runqueue struct macro */
> >  #define RQD(_ops, _cpu)     (&CSCHED2_PRIV(_ops)->rqd[c2r(_ops,
> > _cpu)])
> > +/* Find the max of time slice */
> > +#define MAX_TSLICE(t1, t2)  \
> > +                   ({ typeof (t1) _t1 = (t1); \
> > +                      typeof (t1) _t2 = (t2); \
> > +                      _t1 > _t2 ? _t1 < 0 ? 0 : _t1 : _t2 < 0 ? 0
> > : _t2; })
> Hiding the zero-checking behind this seems a bit strange to me.  I
> think if the algorithm is properly laid out, we probably don't need
> this at all (see my proposed alternate below).
> 
I think it's more "overflow-avoidance" than "zero-checking", and as I
said, that's probably better done by means of subtraction(s).

In any case, I agree that, if we don't need it, that's even better. :-)

> > @@ -1709,17 +1767,22 @@ csched2_runtime(const struct scheduler
> > *ops, int cpu, struct csched2_vcpu *snext
> >           * at a different rate. */
> >          time = c2t(rqd, rt_credit, snext);
> > 
> > -        /* Check limits */
> > -        if ( time < CSCHED2_MIN_TIMER )
> > -        {
> > -            time = CSCHED2_MIN_TIMER;
> > -            SCHED_STAT_CRANK(runtime_min_timer);
> > -        }
> > -        else if ( time > CSCHED2_MAX_TIMER )
> > +        /* Check limits.
> > +         * time > ratelimit --> time > MIN.
> > +         */
> > +        if ( time > CSCHED2_MAX_TIMER )
> >          {
> > +
> >              time = CSCHED2_MAX_TIMER;
> >              SCHED_STAT_CRANK(runtime_max_timer);
> >          }
> > +        else
> > +        {
> > +            time = MAX_TSLICE(MAX_TSLICE(CSCHED2_MIN_TIMER,
> > +                                          MICROSECS(prv-
> > >ratelimit_us)), time);
> > +            SCHED_STAT_CRANK(time);
> > +        }
> > +
> >      }
> This is all really confusing.  What about something like this (also
> attached)?  The basic idea is to calculate min_time, then go through
> the normal algorithm, then clip it once at the end.
> 
Yes, this matches my idea and my comment, and I think the code provided
by George makes sense. Only one thing:

> @@ -1675,9 +1711,19 @@ csched2_runtime(const struct scheduler *ops,
> int cpu, struct csched2_vcpu *snext
>       * 1) Run until snext's credit will be 0
>       * 2) But if someone is waiting, run until snext's credit is
> equal
>       * to his
> -     * 3) But never run longer than MAX_TIMER or shorter than
> MIN_TIMER.
> +     * 3) But never run longer than MAX_TIMER or shorter than
> MIN_TIMER
> +     * or your the ratelimit time.
>       */
> 
> +    /* Calculate mintime */
> +    min_time = CSCHED2_MIN_TIMER;
> +    if ( prv->ratelimit_us ) {
> +        s_time_t ratelimit_min = snext->vcpu-
> >runstate.state_entry_time +
> +            MICROSECS(prv->ratelimit_us) - now;
>
Here snext can indeed be someone which was running already (e.g., we're
choosing current again), in which case runstate.state_entry-time-now
would indeed tell us how long it's actually been running, and the
formula (coupled with the if below) is correct.

But it also can be someone which is runnable (e.g., we're choosing
someone from the runqueue and preempting current), in which case
runstate.state_entry_time tells when it became runnable, and
state_entry_time-now is how long it's been runnable, which is not what
we want here.

In think, in such a case, we want ratelimit_min to just be equal to
prv->ratelimit_us. So, maybe, something like this:

 /* Caluclate mintime */
 min_time = CSCHED2_MIN_TIMER;
 if ( prv->ratelimit_us )
 {
     s_time_t ratelimit_min = prv->ratelimit_us;
     if ( snext->vcpu->is_running )     // XXX or is it better snext == curr_on_cpu(cpu)
         ratelimit_min = snext->vcpu->runstate.state_entry_time +
                         MICROSECS(prv->ratelimit_us) - now;
     if ( ratelimit_min > min_time )
         min_time = ratelimit_min;
 }

> > @@ -2353,6 +2431,8 @@ csched2_init(struct scheduler *ops)
> >          prv->runq_map[i] = -1;
> >          prv->rqd[i].id = -1;
> >      }
> > +    /* initialize ratelimit */
> > +    prv->ratelimit_us = 1000 * CSCHED2_MIN_TIMER;
> Is there any reason this isn't using sched_ratelimit_us (the
> hypervisor command-line option, which the credit scheduler is using)?
> 
Yeah, I guess just using that is the best thing to start with.

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] credi2-ratelimit: Implement rate limit for credit2 scheduler
  2016-07-13  8:53   ` Dario Faggioli
@ 2016-07-13 11:08     ` George Dunlap
  2016-07-18 11:54     ` anshul makkar
  1 sibling, 0 replies; 6+ messages in thread
From: George Dunlap @ 2016-07-13 11:08 UTC (permalink / raw)
  To: Dario Faggioli, George Dunlap, Anshul Makkar; +Cc: xen-devel

On 13/07/16 09:53, Dario Faggioli wrote:
>> @@ -1675,9 +1711,19 @@ csched2_runtime(const struct scheduler *ops,
>> int cpu, struct csched2_vcpu *snext
>>       * 1) Run until snext's credit will be 0
>>       * 2) But if someone is waiting, run until snext's credit is
>> equal
>>       * to his
>> -     * 3) But never run longer than MAX_TIMER or shorter than
>> MIN_TIMER.
>> +     * 3) But never run longer than MAX_TIMER or shorter than
>> MIN_TIMER
>> +     * or your the ratelimit time.
>>       */
>>
>> +    /* Calculate mintime */
>> +    min_time = CSCHED2_MIN_TIMER;
>> +    if ( prv->ratelimit_us ) {
>> +        s_time_t ratelimit_min = snext->vcpu-
>>> runstate.state_entry_time +
>> +            MICROSECS(prv->ratelimit_us) - now;
>>
> Here snext can indeed be someone which was running already (e.g., we're
> choosing current again), in which case runstate.state_entry-time-now
> would indeed tell us how long it's actually been running, and the
> formula (coupled with the if below) is correct.
> 
> But it also can be someone which is runnable (e.g., we're choosing
> someone from the runqueue and preempting current), in which case
> runstate.state_entry_time tells when it became runnable, and
> state_entry_time-now is how long it's been runnable, which is not what
> we want here.
> 
> In think, in such a case, we want ratelimit_min to just be equal to
> prv->ratelimit_us. So, maybe, something like this:
> 
>  /* Caluclate mintime */
>  min_time = CSCHED2_MIN_TIMER;
>  if ( prv->ratelimit_us )
>  {
>      s_time_t ratelimit_min = prv->ratelimit_us;
>      if ( snext->vcpu->is_running )     // XXX or is it better snext == curr_on_cpu(cpu)
>          ratelimit_min = snext->vcpu->runstate.state_entry_time +
>                          MICROSECS(prv->ratelimit_us) - now;
>      if ( ratelimit_min > min_time )
>          min_time = ratelimit_min;
>  }

+1

 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] credi2-ratelimit: Implement rate limit for credit2 scheduler
  2016-07-13  8:53   ` Dario Faggioli
  2016-07-13 11:08     ` George Dunlap
@ 2016-07-18 11:54     ` anshul makkar
  1 sibling, 0 replies; 6+ messages in thread
From: anshul makkar @ 2016-07-18 11:54 UTC (permalink / raw)
  To: Dario Faggioli, George Dunlap; +Cc: xen-devel

On 13/07/16 09:53, Dario Faggioli wrote:
> On Tue, 2016-07-12 at 17:16 +0100, George Dunlap wrote:
>> On Wed, Jul 6, 2016 at 6:33 PM, Makkar anshul.makkar@citrix.com
>> <Anshul> wrote:
>>>
>>> --- a/xen/common/sched_credit2.c
>>> +++ b/xen/common/sched_credit2.c
>>> +#define MAX_TSLICE(t1, t2)  \
>>> +                   ({ typeof (t1) _t1 = (t1); \
>>> +                      typeof (t1) _t2 = (t2); \
>>> +                      _t1 > _t2 ? _t1 < 0 ? 0 : _t1 : _t2 < 0 ? 0
>>> : _t2; })
>> Hiding the zero-checking behind this seems a bit strange to me.  I
>> think if the algorithm is properly laid out, we probably don't need
>> this at all (see my proposed alternate below).
>>
> I think it's more "overflow-avoidance" than "zero-checking", and as I
> said, that's probably better done by means of subtraction(s).
>
> In any case, I agree that, if we don't need it, that's even better. :-)
Agreed. I am going to remove it. Its not needed with modified algo.
>

>> This is all really confusing.  What about something like this (also
>> attached)?  The basic idea is to calculate min_time, then go through
>> the normal algorithm, then clip it once at the end.
>>
> Yes, this matches my idea and my comment, and I think the code provided
> by George makes sense. Only one thing:
>
>> @@ -1675,9 +1711,19 @@ csched2_runtime(const struct scheduler *ops,
>> int cpu, struct csched2_vcpu *snext
>> +     * or your the ratelimit time.
>>        */
>> +    /* Calculate mintime */
>> +    min_time = CSCHED2_MIN_TIMER;
>> +    if ( prv->ratelimit_us ) {
>> +        s_time_t ratelimit_min = snext->vcpu-
>>> runstate.state_entry_time +
>> +            MICROSECS(prv->ratelimit_us) - now;
>>
> Here snext can indeed be someone which was running already (e.g., we're
> choosing current again), in which case runstate.state_entry-time-now
> would indeed tell us how long it's actually been running, and the
> formula (coupled with the if below) is correct.
>
> But it also can be someone which is runnable (e.g., we're choosing
> someone from the runqueue and preempting current), in which case
> runstate.state_entry_time tells when it became runnable, and
> state_entry_time-now is how long it's been runnable, which is not what
> we want here.
>
> In think, in such a case, we want ratelimit_min to just be equal to
> prv->ratelimit_us. So, maybe, something like this:
>
>   /* Caluclate mintime */
>   min_time = CSCHED2_MIN_TIMER;
>   if ( prv->ratelimit_us )
>   {
>       s_time_t ratelimit_min = prv->ratelimit_us;
>       if ( snext->vcpu->is_running )     // XXX or is it better snext == curr_on_cpu(cpu)
>           ratelimit_min = snext->vcpu->runstate.state_entry_time +
>                           MICROSECS(prv->ratelimit_us) - now;
>       if ( ratelimit_min > min_time )
>           min_time = ratelimit_min;
>   }
>
Agreed.
>>> @@ -2353,6 +2431,8 @@ csched2_init(struct scheduler *ops)
>>>           prv->runq_map[i] = -1;
>>>           prv->rqd[i].id = -1;
>>>       }
>>> +    /* initialize ratelimit */
>>> +    prv->ratelimit_us = 1000 * CSCHED2_MIN_TIMER;
>> Is there any reason this isn't using sched_ratelimit_us (the
>> hypervisor command-line option, which the credit scheduler is using)?
>>
> Yeah, I guess just using that is the best thing to start with.
Agreed.
>
> Regards,
> Dario
>
Will post the reworked patch.

Anshul


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-07-18 11:54 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-06 17:33 [PATCH] credi2-ratelimit: Implement rate limit for credit2 scheduler Anshul
2016-07-07  9:59 ` Dario Faggioli
2016-07-12 16:16 ` George Dunlap
2016-07-13  8:53   ` Dario Faggioli
2016-07-13 11:08     ` George Dunlap
2016-07-18 11:54     ` anshul makkar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).