All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dario Faggioli <dario.faggioli@citrix.com>
To: xen-devel@lists.xenproject.org
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Subject: [PATCH 4/6] xen: credit1: treat pCPUs more evenly during balancing.
Date: Thu, 02 Mar 2017 11:38:27 +0100	[thread overview]
Message-ID: <148845110684.23452.11988704692319209140.stgit@Solace.fritz.box> (raw)
In-Reply-To: <148844531279.23452.17528540110704914171.stgit@Solace.fritz.box>

Right now, we use cpumask_first() for going through
the bus pCPUs in csched_load_balance(). This means
not all pCPUs have equal chances of seeing their
pending work stolen. It also means there is more
runqueue lock pressure on lower ID pCPUs.

To avoid all this, let's record and remember, for
each NUMA node, from what pCPU we have stolen for
last, and start from that the following time.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
---
Cc: George Dunlap <george.dunlap@eu.citrix.com>
---
 xen/common/sched_credit.c |   37 +++++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index 529b6c7..bae29a7 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -229,6 +229,7 @@ struct csched_private {
     uint32_t credit;
     int credit_balance;
     uint32_t runq_sort;
+    uint32_t *balance_bias;
     unsigned ratelimit_us;
     /* Period of master and tick in milliseconds */
     unsigned tslice_ms, tick_period_us, ticks_per_tslice;
@@ -548,6 +549,7 @@ csched_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
 {
     struct csched_private *prv = CSCHED_PRIV(ops);
     struct csched_pcpu *spc = pcpu;
+    unsigned int node = cpu_to_node(cpu);
     unsigned long flags;
 
     /*
@@ -571,6 +573,12 @@ csched_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
         prv->master = cpumask_first(prv->cpus);
         migrate_timer(&prv->master_ticker, prv->master);
     }
+    if ( prv->balance_bias[node] == cpu )
+    {
+        cpumask_and(cpumask_scratch, prv->cpus, &node_to_cpumask(node));
+        if ( !cpumask_empty(cpumask_scratch) )
+            prv->balance_bias[node] =  cpumask_first(cpumask_scratch);
+    }
     kill_timer(&spc->ticker);
     if ( prv->ncpus == 0 )
         kill_timer(&prv->master_ticker);
@@ -610,6 +618,10 @@ init_pdata(struct csched_private *prv, struct csched_pcpu *spc, int cpu)
                   NOW() + MILLISECS(prv->tslice_ms));
     }
 
+    cpumask_and(cpumask_scratch, prv->cpus, &node_to_cpumask(cpu_to_node(cpu)));
+    if ( cpumask_weight(cpumask_scratch) == 1 )
+        prv->balance_bias[cpu_to_node(cpu)] = cpu;
+
     init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu);
     set_timer(&spc->ticker, NOW() + MICROSECS(prv->tick_period_us) );
 
@@ -1696,7 +1708,7 @@ csched_load_balance(struct csched_private *prv, int cpu,
     struct csched_vcpu *speer;
     cpumask_t workers;
     cpumask_t *online;
-    int peer_cpu, peer_node, bstep;
+    int peer_cpu, first_cpu, peer_node, bstep;
     int node = cpu_to_node(cpu);
 
     BUG_ON( cpu != snext->vcpu->processor );
@@ -1740,9 +1752,10 @@ csched_load_balance(struct csched_private *prv, int cpu,
             cpumask_and(&workers, &workers, &node_to_cpumask(peer_node));
             __cpumask_clear_cpu(cpu, &workers);
 
-            peer_cpu = cpumask_first(&workers);
-            if ( peer_cpu >= nr_cpu_ids )
+            first_cpu = cpumask_cycle(prv->balance_bias[peer_node], &workers);
+            if ( first_cpu >= nr_cpu_ids )
                 goto next_node;
+            peer_cpu = first_cpu;
             do
             {
                 /*
@@ -1770,12 +1783,18 @@ csched_load_balance(struct csched_private *prv, int cpu,
                 if ( speer != NULL )
                 {
                     *stolen = 1;
+                    /*
+                     * Next time we'll look for work to steal on this node, we
+                     * will start from the next pCPU, with respect to this one,
+                     * so we don't risk stealing always from the same ones.
+                     */
+                    prv->balance_bias[peer_node] = peer_cpu;
                     return speer;
                 }
 
                 peer_cpu = cpumask_cycle(peer_cpu, &workers);
 
-            } while( peer_cpu != cpumask_first(&workers) );
+            } while( peer_cpu != first_cpu );
 
  next_node:
             peer_node = cycle_node(peer_node, node_online_map);
@@ -2126,6 +2145,14 @@ csched_init(struct scheduler *ops)
     prv = xzalloc(struct csched_private);
     if ( prv == NULL )
         return -ENOMEM;
+
+    prv->balance_bias = xzalloc_array(uint32_t, MAX_NUMNODES);
+    if ( prv->balance_bias == NULL )
+    {
+        xfree(prv);
+        return -ENOMEM;
+    }
+
     if ( !zalloc_cpumask_var(&prv->cpus) ||
          !zalloc_cpumask_var(&prv->idlers) ||
          !zalloc_cpumask_var(&prv->overloaded) )
@@ -2133,6 +2160,7 @@ csched_init(struct scheduler *ops)
         free_cpumask_var(prv->overloaded);
         free_cpumask_var(prv->idlers);
         free_cpumask_var(prv->cpus);
+        xfree(prv->balance_bias);
         xfree(prv);
         return -ENOMEM;
     }
@@ -2179,6 +2207,7 @@ csched_deinit(struct scheduler *ops)
         free_cpumask_var(prv->cpus);
         free_cpumask_var(prv->idlers);
         free_cpumask_var(prv->overloaded);
+        xfree(prv->balance_bias);
         xfree(prv);
     }
 }


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2017-03-02 10:38 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-02 10:37 [PATCH 0/6] xen: sched: improve scalability of Credit1, and optimize a bit both Credit1 and Credit2 Dario Faggioli
2017-03-02 10:38 ` [PATCH 1/6] xen: credit1: simplify csched_runq_steal() a little bit Dario Faggioli
2017-03-03  9:35   ` anshul makkar
2017-03-03 13:39     ` Dario Faggioli
2017-03-02 10:38 ` [PATCH 2/6] xen: credit: (micro) optimize csched_runq_steal() Dario Faggioli
2017-03-03  9:48   ` anshul makkar
2017-03-03 13:53     ` Dario Faggioli
2017-03-02 10:38 ` [PATCH 3/6] xen: credit1: increase efficiency and scalability of load balancing Dario Faggioli
2017-03-02 11:06   ` Andrew Cooper
2017-03-02 11:35     ` Dario Faggioli
2017-04-06  7:37     ` Dario Faggioli
2017-03-02 10:38 ` Dario Faggioli [this message]
2017-03-02 10:38 ` [PATCH 5/6] xen/tools: tracing: add record for credit1 runqueue stealing Dario Faggioli
2017-03-02 10:38 ` [PATCH 6/6] xen: credit2: avoid cpumask_any() in pick_cpu() Dario Faggioli
2017-03-02 10:58 ` [PATCH 0/6] xen: sched: improve scalability of Credit1, and optimize a bit both Credit1 and Credit2 Dario Faggioli
2017-03-27  9:08 ` Dario Faggioli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=148845110684.23452.11988704692319209140.stgit@Solace.fritz.box \
    --to=dario.faggioli@citrix.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.