[Xen-devel] [PATCH v6 13/20] xen/sched: split schedule_cpu_switch()

From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xenproject.org
Cc: Juergen Gross <jgross@suse.com>, Tim Deegan <tim@xen.org>,
	Stefano Stabellini <sstabellini@kernel.org>,
	Julien Grall <julien@xen.org>, Wei Liu <wl@xen.org>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	George Dunlap <George.Dunlap@eu.citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	Dario Faggioli <dfaggioli@suse.com>,
	Jan Beulich <jbeulich@suse.com>
Subject: [Xen-devel] [PATCH v6 13/20] xen/sched: split schedule_cpu_switch()
Date: Wed,  2 Oct 2019 09:27:38 +0200	[thread overview]
Message-ID: <20191002072745.24919-14-jgross@suse.com> (raw)
In-Reply-To: <20191002072745.24919-1-jgross@suse.com>

Instead of letting schedule_cpu_switch() handle moving cpus from and
to cpupools, split it into schedule_cpu_add() and schedule_cpu_rm().

This will allow us to drop allocating/freeing scheduler data for free
cpus as the idle scheduler doesn't need such data.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Dario Faggioli <dfaggioli@suse.com>
---
V1: new patch
V4:
- rename sd -> sr (Jan Beulich)
---
 xen/common/cpupool.c    |   4 +-
 xen/common/schedule.c   | 133 +++++++++++++++++++++++++++---------------------
 xen/include/xen/sched.h |   3 +-
 3 files changed, 78 insertions(+), 62 deletions(-)

diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c
index 51f0ff0d88..02825e779d 100644
--- a/xen/common/cpupool.c
+++ b/xen/common/cpupool.c
@@ -271,7 +271,7 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
 
     if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) )
         return -EADDRNOTAVAIL;
-    ret = schedule_cpu_switch(cpu, c);
+    ret = schedule_cpu_add(cpu, c);
     if ( ret )
         return ret;
 
@@ -321,7 +321,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
      */
     if ( !ret )
     {
-        ret = schedule_cpu_switch(cpu, NULL);
+        ret = schedule_cpu_rm(cpu);
         if ( ret )
             cpumask_clear_cpu(cpu, &cpupool_free_cpus);
         else
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 5257225050..a96fc82282 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -93,15 +93,6 @@ static struct scheduler __read_mostly ops;
 static void sched_set_affinity(
     struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft);
 
-static spinlock_t *
-sched_idle_switch_sched(struct scheduler *new_ops, unsigned int cpu,
-                        void *pdata, void *vdata)
-{
-    sched_idle_unit(cpu)->priv = NULL;
-
-    return &sched_free_cpu_lock;
-}
-
 static struct sched_resource *
 sched_idle_res_pick(const struct scheduler *ops, const struct sched_unit *unit)
 {
@@ -141,7 +132,6 @@ static struct scheduler sched_idle_ops = {
 
     .alloc_udata    = sched_idle_alloc_udata,
     .free_udata     = sched_idle_free_udata,
-    .switch_sched   = sched_idle_switch_sched,
 };
 
 static inline struct vcpu *unit2vcpu_cpu(const struct sched_unit *unit,
@@ -2547,36 +2537,22 @@ void __init scheduler_init(void)
 }
 
 /*
- * Move a pCPU outside of the influence of the scheduler of its current
- * cpupool, or subject it to the scheduler of a new cpupool.
- *
- * For the pCPUs that are removed from their cpupool, their scheduler becomes
- * &sched_idle_ops (the idle scheduler).
+ * Move a pCPU from free cpus (running the idle scheduler) to a cpupool
+ * using any "real" scheduler.
+ * The cpu is still marked as "free" and not yet valid for its cpupool.
  */
-int schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
+int schedule_cpu_add(unsigned int cpu, struct cpupool *c)
 {
     struct vcpu *idle;
-    void *ppriv, *ppriv_old, *vpriv, *vpriv_old;
-    struct scheduler *old_ops = get_sched_res(cpu)->scheduler;
-    struct scheduler *new_ops = (c == NULL) ? &sched_idle_ops : c->sched;
-    struct sched_resource *sd = get_sched_res(cpu);
-    struct cpupool *old_pool = sd->cpupool;
+    void *ppriv, *vpriv;
+    struct scheduler *new_ops = c->sched;
+    struct sched_resource *sr = get_sched_res(cpu);
     spinlock_t *old_lock, *new_lock;
     unsigned long flags;
 
-    /*
-     * pCPUs only move from a valid cpupool to free (i.e., out of any pool),
-     * or from free to a valid cpupool. In the former case (which happens when
-     * c is NULL), we want the CPU to have been marked as free already, as
-     * well as to not be valid for the source pool any longer, when we get to
-     * here. In the latter case (which happens when c is a valid cpupool), we
-     * want the CPU to still be marked as free, as well as to not yet be valid
-     * for the destination pool.
-     */
-    ASSERT(c != old_pool && (c != NULL || old_pool != NULL));
     ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus));
-    ASSERT((c == NULL && !cpumask_test_cpu(cpu, old_pool->cpu_valid)) ||
-           (c != NULL && !cpumask_test_cpu(cpu, c->cpu_valid)));
+    ASSERT(!cpumask_test_cpu(cpu, c->cpu_valid));
+    ASSERT(get_sched_res(cpu)->cpupool == NULL);
 
     /*
      * To setup the cpu for the new scheduler we need:
@@ -2601,52 +2577,91 @@ int schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
         return -ENOMEM;
     }
 
-    sched_do_tick_suspend(old_ops, cpu);
-
     /*
-     * The actual switch, including (if necessary) the rerouting of the
-     * scheduler lock to whatever new_ops prefers,  needs to happen in one
-     * critical section, protected by old_ops' lock, or races are possible.
-     * It is, in fact, the lock of another scheduler that we are taking (the
-     * scheduler of the cpupool that cpu still belongs to). But that is ok
-     * as, anyone trying to schedule on this cpu will spin until when we
-     * release that lock (bottom of this function). When he'll get the lock
-     * --thanks to the loop inside *_schedule_lock() functions-- he'll notice
-     * that the lock itself changed, and retry acquiring the new one (which
-     * will be the correct, remapped one, at that point).
+     * The actual switch, including the rerouting of the scheduler lock to
+     * whatever new_ops prefers, needs to happen in one critical section,
+     * protected by old_ops' lock, or races are possible.
+     * It is, in fact, the lock of the idle scheduler that we are taking.
+     * But that is ok as anyone trying to schedule on this cpu will spin until
+     * when we release that lock (bottom of this function). When he'll get the
+     * lock --thanks to the loop inside *_schedule_lock() functions-- he'll
+     * notice that the lock itself changed, and retry acquiring the new one
+     * (which will be the correct, remapped one, at that point).
      */
     old_lock = pcpu_schedule_lock_irqsave(cpu, &flags);
 
-    vpriv_old = idle->sched_unit->priv;
-    ppriv_old = sd->sched_priv;
     new_lock = sched_switch_sched(new_ops, cpu, ppriv, vpriv);
 
-    sd->scheduler = new_ops;
-    sd->sched_priv = ppriv;
+    sr->scheduler = new_ops;
+    sr->sched_priv = ppriv;
 
     /*
-     * The data above is protected under new_lock, which may be unlocked.
-     * Another CPU can take new_lock as soon as sd->schedule_lock is visible,
-     * and must observe all prior initialisation.
+     * Reroute the lock to the per pCPU lock as /last/ thing. In fact,
+     * if it is free (and it can be) we want that anyone that manages
+     * taking it, finds all the initializations we've done above in place.
      */
     smp_wmb();
-    sd->schedule_lock = new_lock;
+    sr->schedule_lock = new_lock;
 
-    /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */
+    /* _Not_ pcpu_schedule_unlock(): schedule_lock has changed! */
     spin_unlock_irqrestore(old_lock, flags);
 
     sched_do_tick_resume(new_ops, cpu);
 
+    sr->granularity = cpupool_get_granularity(c);
+    sr->cpupool = c;
+    /* The  cpu is added to a pool, trigger it to go pick up some work */
+    cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+
+    return 0;
+}
+
+/*
+ * Remove a pCPU from its cpupool. Its scheduler becomes &sched_idle_ops
+ * (the idle scheduler).
+ * The cpu is already marked as "free" and not valid any longer for its
+ * cpupool.
+ */
+int schedule_cpu_rm(unsigned int cpu)
+{
+    struct vcpu *idle;
+    void *ppriv_old, *vpriv_old;
+    struct sched_resource *sr = get_sched_res(cpu);
+    struct scheduler *old_ops = sr->scheduler;
+    spinlock_t *old_lock;
+    unsigned long flags;
+
+    ASSERT(sr->cpupool != NULL);
+    ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus));
+    ASSERT(!cpumask_test_cpu(cpu, sr->cpupool->cpu_valid));
+
+    idle = idle_vcpu[cpu];
+
+    sched_do_tick_suspend(old_ops, cpu);
+
+    /* See comment in schedule_cpu_add() regarding lock switching. */
+    old_lock = pcpu_schedule_lock_irqsave(cpu, &flags);
+
+    vpriv_old = idle->sched_unit->priv;
+    ppriv_old = sr->sched_priv;
+
+    idle->sched_unit->priv = NULL;
+    sr->scheduler = &sched_idle_ops;
+    sr->sched_priv = NULL;
+
+    smp_mb();
+    sr->schedule_lock = &sched_free_cpu_lock;
+
+    /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */
+    spin_unlock_irqrestore(old_lock, flags);
+
     sched_deinit_pdata(old_ops, ppriv_old, cpu);
 
     sched_free_udata(old_ops, vpriv_old);
     sched_free_pdata(old_ops, ppriv_old, cpu);
 
-    get_sched_res(cpu)->granularity = cpupool_get_granularity(c);
-    get_sched_res(cpu)->cpupool = c;
-    /* When a cpu is added to a pool, trigger it to go pick up some work */
-    if ( c != NULL )
-        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    sr->granularity = 1;
+    sr->cpupool = NULL;
 
     return 0;
 }
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index aa8257edc9..a40bd5fb56 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -920,7 +920,8 @@ struct scheduler;
 struct scheduler *scheduler_get_default(void);
 struct scheduler *scheduler_alloc(unsigned int sched_id, int *perr);
 void scheduler_free(struct scheduler *sched);
-int schedule_cpu_switch(unsigned int cpu, struct cpupool *c);
+int schedule_cpu_add(unsigned int cpu, struct cpupool *c);
+int schedule_cpu_rm(unsigned int cpu);
 void vcpu_set_periodic_timer(struct vcpu *v, s_time_t value);
 int cpu_disable_scheduler(unsigned int cpu);
 void sched_setup_dom0_vcpus(struct domain *d);
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel