All of lore.kernel.org
 help / color / mirror / Atom feed
From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xenproject.org
Cc: "Juergen Gross" <jgross@suse.com>,
	"Stefano Stabellini" <sstabellini@kernel.org>,
	"Wei Liu" <wl@xen.org>,
	"Konrad Rzeszutek Wilk" <konrad.wilk@oracle.com>,
	"George Dunlap" <George.Dunlap@eu.citrix.com>,
	"Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Ian Jackson" <ian.jackson@eu.citrix.com>,
	"Tim Deegan" <tim@xen.org>, "Julien Grall" <julien.grall@arm.com>,
	"Jan Beulich" <jbeulich@suse.com>,
	"Dario Faggioli" <dfaggioli@suse.com>,
	"Roger Pau Monné" <roger.pau@citrix.com>
Subject: [PATCH 54/60] xen/sched: add minimalistic idle scheduler for free cpus
Date: Tue, 28 May 2019 12:33:07 +0200	[thread overview]
Message-ID: <20190528103313.1343-55-jgross@suse.com> (raw)
In-Reply-To: <20190528103313.1343-1-jgross@suse.com>

Instead of having a full blown scheduler running for the free cpus
add a very minimalistic scheduler for that purpose only ever scheduling
the related idle vcpu. This has the big advantage of not needing any
per-cpu, per-domain or per-scheduling unit data for free cpus and in
turn simplifying moving cpus to and from cpupools a lot.

As this new scheduler is not user selectable don't register it as an
official scheduler, but just include it in schedule.c.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V1: new patch
---
 xen/arch/arm/smpboot.c  |   2 -
 xen/arch/x86/smpboot.c  |   2 -
 xen/common/schedule.c   | 143 +++++++++++++++++++++++-------------------------
 xen/include/xen/sched.h |   1 -
 4 files changed, 67 insertions(+), 81 deletions(-)

diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c
index 9a6582f2a6..f756444362 100644
--- a/xen/arch/arm/smpboot.c
+++ b/xen/arch/arm/smpboot.c
@@ -350,8 +350,6 @@ void start_secondary(unsigned long boot_phys_offset,
 
     setup_cpu_sibling_map(cpuid);
 
-    scheduler_percpu_init(cpuid);
-
     /* Run local notifiers */
     notify_cpu_starting(cpuid);
     /*
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 7e95b2cdac..153bfbb4b7 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -382,8 +382,6 @@ void start_secondary(void *unused)
 
     set_cpu_sibling_map(cpu);
 
-    scheduler_percpu_init(cpu);
-
     init_percpu_time();
 
     setup_secondary_APIC_clock();
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 7a5ab4b1b6..d3e4ae226c 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -83,6 +83,57 @@ extern const struct scheduler *__start_schedulers_array[], *__end_schedulers_arr
 
 static struct scheduler __read_mostly ops;
 
+static spinlock_t *
+sched_idle_switch_sched(struct scheduler *new_ops, unsigned int cpu,
+                        void *pdata, void *vdata)
+{
+    sched_idle_unit(cpu)->priv = NULL;
+
+    return &sched_free_cpu_lock;
+}
+
+static struct sched_resource *
+sched_idle_res_pick(const struct scheduler *ops, struct sched_unit *unit)
+{
+    return unit->res;
+}
+
+static void *
+sched_idle_alloc_vdata(const struct scheduler *ops, struct sched_unit *unit,
+                       void *dd)
+{
+    /* Any non-NULL pointer is fine here. */
+    return (void *)1UL;
+}
+
+static void
+sched_idle_free_vdata(const struct scheduler *ops, void *priv)
+{
+}
+
+static void sched_idle_schedule(
+    const struct scheduler *ops, struct sched_unit *unit, s_time_t now,
+    bool tasklet_work_scheduled)
+{
+    const unsigned int cpu = smp_processor_id();
+
+    unit->next_time = -1;
+    unit->next_task = sched_idle_unit(sched_get_resource_cpu(cpu));
+}
+
+static struct scheduler sched_idle_ops = {
+    .name           = "Idle Scheduler",
+    .opt_name       = "idle",
+    .sched_data     = NULL,
+
+    .pick_resource  = sched_idle_res_pick,
+    .do_schedule    = sched_idle_schedule,
+
+    .alloc_vdata    = sched_idle_alloc_vdata,
+    .free_vdata     = sched_idle_free_vdata,
+    .switch_sched   = sched_idle_switch_sched,
+};
+
 static inline struct vcpu *unit2vcpu_cpu(struct sched_unit *unit,
                                          unsigned int cpu)
 {
@@ -2141,7 +2192,6 @@ static void poll_timer_fn(void *data)
 static int cpu_schedule_up(unsigned int cpu)
 {
     struct sched_resource *sd;
-    void *sched_priv;
 
     sd = xzalloc(struct sched_resource);
     if ( sd == NULL )
@@ -2150,7 +2200,7 @@ static int cpu_schedule_up(unsigned int cpu)
     sd->cpus = cpumask_of(cpu);
     set_sched_res(cpu, sd);
 
-    sd->scheduler = &ops;
+    sd->scheduler = &sched_idle_ops;
     spin_lock_init(&sd->_lock);
     sd->schedule_lock = &sched_free_cpu_lock;
     init_timer(&sd->s_timer, s_timer_fn, NULL, cpu);
@@ -2171,20 +2221,10 @@ static int cpu_schedule_up(unsigned int cpu)
         struct sched_unit *unit = idle->sched_unit;
 
         /*
-         * During (ACPI?) suspend the idle vCPU for this pCPU is not freed,
-         * while its scheduler specific data (what is pointed by sched_priv)
-         * is. Also, at this stage of the resume path, we attach the pCPU
-         * to the default scheduler, no matter in what cpupool it was before
-         * suspend. To avoid inconsistency, let's allocate default scheduler
-         * data for the idle vCPU here. If the pCPU was in a different pool
-         * with a different scheduler, it is schedule_cpu_switch(), invoked
-         * later, that will set things up as appropriate.
+         * No need to allocate any scheduler data, as cpus coming online are
+         * free initially and the idle scheduler doesn't need any data areas
+         * allocated.
          */
-        ASSERT(unit->priv == NULL);
-
-        unit->priv = sched_alloc_vdata(&ops, unit, idle->domain->sched_priv);
-        if ( unit->priv == NULL )
-            return -ENOMEM;
 
         /* Update the resource pointer in the idle unit. */
         unit->res = sd;
@@ -2195,16 +2235,7 @@ static int cpu_schedule_up(unsigned int cpu)
     sd->curr = idle_vcpu[cpu]->sched_unit;
     sd->sched_unit_idle = idle_vcpu[cpu]->sched_unit;
 
-    /*
-     * We don't want to risk calling xfree() on an sd->sched_priv
-     * (e.g., inside free_pdata, from cpu_schedule_down() called
-     * during CPU_UP_CANCELLED) that contains an IS_ERR value.
-     */
-    sched_priv = sched_alloc_pdata(&ops, cpu);
-    if ( IS_ERR(sched_priv) )
-        return PTR_ERR(sched_priv);
-
-    sd->sched_priv = sched_priv;
+    sd->sched_priv = NULL;
 
     return 0;
 }
@@ -2212,13 +2243,6 @@ static int cpu_schedule_up(unsigned int cpu)
 static void cpu_schedule_down(unsigned int cpu)
 {
     struct sched_resource *sd = get_sched_res(cpu);
-    struct scheduler *sched = sd->scheduler;
-
-    sched_free_pdata(sched, sd->sched_priv, cpu);
-    sched_free_vdata(sched, idle_vcpu[cpu]->sched_unit->priv);
-
-    idle_vcpu[cpu]->sched_unit->priv = NULL;
-    sd->sched_priv = NULL;
 
     kill_timer(&sd->s_timer);
 
@@ -2226,26 +2250,14 @@ static void cpu_schedule_down(unsigned int cpu)
     xfree(sd);
 }
 
-void scheduler_percpu_init(unsigned int cpu)
-{
-    struct sched_resource *sd = get_sched_res(cpu);
-    struct scheduler *sched = sd->scheduler;
-
-    if ( system_state != SYS_STATE_resume )
-        sched_init_pdata(sched, sd->sched_priv, cpu);
-}
-
 void sched_rm_cpu(unsigned int cpu)
 {
     int rc;
-    struct sched_resource *sd = get_sched_res(cpu);
-    struct scheduler *sched = sd->scheduler;
 
     rcu_read_lock(&domlist_read_lock);
     rc = cpu_disable_scheduler(cpu);
     BUG_ON(rc);
     rcu_read_unlock(&domlist_read_lock);
-    sched_deinit_pdata(sched, sd->sched_priv, cpu);
     cpu_schedule_down(cpu);
 }
 
@@ -2260,32 +2272,22 @@ static int cpu_schedule_callback(
      * allocating and initializing the per-pCPU scheduler specific data,
      * as well as "registering" this pCPU to the scheduler (which may
      * involve modifying some scheduler wide data structures).
-     * This happens by calling the alloc_pdata and init_pdata hooks, in
-     * this order. A scheduler that does not need to allocate any per-pCPU
-     * data can avoid implementing alloc_pdata. init_pdata may, however, be
-     * necessary/useful in this case too (e.g., it can contain the "register
-     * the pCPU to the scheduler" part). alloc_pdata (if present) is called
-     * during CPU_UP_PREPARE. init_pdata (if present) is called before
-     * CPU_STARTING in scheduler_percpu_init().
+     * As new pCPUs always start as "free" cpus with the minimal idle
+     * scheduler being in charge, we don't need any of that.
      *
      * On the other hand, at teardown, we need to reverse what has been done
-     * during initialization, and then free the per-pCPU specific data. This
-     * happens by calling the deinit_pdata and free_pdata hooks, in this
+     * during initialization, and then free the per-pCPU specific data. A
+     * pCPU brought down is not forced through "free" cpus, so here we need to
+     * use the appropriate hooks.
+     *
+     * This happens by calling the deinit_pdata and free_pdata hooks, in this
      * order. If no per-pCPU memory was allocated, there is no need to
      * provide an implementation of free_pdata. deinit_pdata may, however,
      * be necessary/useful in this case too (e.g., it can undo something done
      * on scheduler wide data structure during init_pdata). Both deinit_pdata
      * and free_pdata are called during CPU_DEAD.
      *
-     * If someting goes wrong during bringup, we go to CPU_UP_CANCELLED
-     * *before* having called init_pdata. In this case, as there is no
-     * initialization needing undoing, only free_pdata should be called.
-     * This means it is possible to call free_pdata just after alloc_pdata,
-     * without a init_pdata/deinit_pdata "cycle" in between the two.
-     *
-     * So, in summary, the usage pattern should look either
-     *  - alloc_pdata-->init_pdata-->deinit_pdata-->free_pdata, or
-     *  - alloc_pdata-->free_pdata.
+     * If someting goes wrong during bringup, we go to CPU_UP_CANCELLED.
      */
     switch ( action )
     {
@@ -2402,9 +2404,6 @@ void __init scheduler_init(void)
         BUG();
     get_sched_res(0)->curr = idle_vcpu[0]->sched_unit;
     get_sched_res(0)->sched_unit_idle = idle_vcpu[0]->sched_unit;
-    get_sched_res(0)->sched_priv = sched_alloc_pdata(&ops, 0);
-    BUG_ON(IS_ERR(get_sched_res(0)->sched_priv));
-    scheduler_percpu_init(0);
 }
 
 /*
@@ -2412,18 +2411,14 @@ void __init scheduler_init(void)
  * cpupool, or subject it to the scheduler of a new cpupool.
  *
  * For the pCPUs that are removed from their cpupool, their scheduler becomes
- * &ops (the default scheduler, selected at boot, which also services the
- * default cpupool). However, as these pCPUs are not really part of any pool,
- * there won't be any scheduling event on them, not even from the default
- * scheduler. Basically, they will just sit idle until they are explicitly
- * added back to a cpupool.
+ * &sched_idle_ops (the idle scheduler).
  */
 int schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
 {
     struct vcpu *idle;
     void *ppriv, *ppriv_old, *vpriv, *vpriv_old;
     struct scheduler *old_ops = get_sched_res(cpu)->scheduler;
-    struct scheduler *new_ops = (c == NULL) ? &ops : c->sched;
+    struct scheduler *new_ops = (c == NULL) ? &sched_idle_ops : c->sched;
     struct sched_resource *sd = get_sched_res(cpu);
     struct cpupool *old_pool = sd->cpupool;
     spinlock_t *old_lock, *new_lock;
@@ -2443,9 +2438,6 @@ int schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
     ASSERT((c == NULL && !cpumask_test_cpu(cpu, old_pool->cpu_valid)) ||
            (c != NULL && !cpumask_test_cpu(cpu, c->cpu_valid)));
 
-    if ( old_ops == new_ops )
-        goto out;
-
     /*
      * To setup the cpu for the new scheduler we need:
      *  - a valid instance of per-CPU scheduler specific data, as it is
@@ -2498,7 +2490,7 @@ int schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
      * taking it, finds all the initializations we've done above in place.
      */
     smp_mb();
-    sd->schedule_lock = c ? new_lock : &sched_free_cpu_lock;
+    sd->schedule_lock = new_lock;
 
     /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */
     spin_unlock_irqrestore(old_lock, flags);
@@ -2510,7 +2502,6 @@ int schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
     sched_free_vdata(old_ops, vpriv_old);
     sched_free_pdata(old_ops, ppriv_old, cpu);
 
- out:
     get_sched_res(cpu)->granularity = c ? c->granularity : 1;
     get_sched_res(cpu)->cpupool = c;
     /* When a cpu is added to a pool, trigger it to go pick up some work */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 7dc63c449b..e689bba361 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -677,7 +677,6 @@ void __domain_crash(struct domain *d);
 void noreturn asm_domain_crash_synchronous(unsigned long addr);
 
 void scheduler_init(void);
-void scheduler_percpu_init(unsigned int cpu);
 int  sched_init_vcpu(struct vcpu *v);
 void sched_destroy_vcpu(struct vcpu *v);
 int  sched_init_domain(struct domain *d, int poolid);
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

WARNING: multiple messages have this Message-ID (diff)
From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xenproject.org
Cc: "Juergen Gross" <jgross@suse.com>,
	"Stefano Stabellini" <sstabellini@kernel.org>,
	"Wei Liu" <wl@xen.org>,
	"Konrad Rzeszutek Wilk" <konrad.wilk@oracle.com>,
	"George Dunlap" <George.Dunlap@eu.citrix.com>,
	"Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Ian Jackson" <ian.jackson@eu.citrix.com>,
	"Tim Deegan" <tim@xen.org>, "Julien Grall" <julien.grall@arm.com>,
	"Jan Beulich" <jbeulich@suse.com>,
	"Dario Faggioli" <dfaggioli@suse.com>,
	"Roger Pau Monné" <roger.pau@citrix.com>
Subject: [Xen-devel] [PATCH 54/60] xen/sched: add minimalistic idle scheduler for free cpus
Date: Tue, 28 May 2019 12:33:07 +0200	[thread overview]
Message-ID: <20190528103313.1343-55-jgross@suse.com> (raw)
Message-ID: <20190528103307.PcJxJBaAdIXBHN2FbmAx5qOznqDwSwkgPdKb4CeSyqY@z> (raw)
In-Reply-To: <20190528103313.1343-1-jgross@suse.com>

Instead of having a full blown scheduler running for the free cpus
add a very minimalistic scheduler for that purpose only ever scheduling
the related idle vcpu. This has the big advantage of not needing any
per-cpu, per-domain or per-scheduling unit data for free cpus and in
turn simplifying moving cpus to and from cpupools a lot.

As this new scheduler is not user selectable don't register it as an
official scheduler, but just include it in schedule.c.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V1: new patch
---
 xen/arch/arm/smpboot.c  |   2 -
 xen/arch/x86/smpboot.c  |   2 -
 xen/common/schedule.c   | 143 +++++++++++++++++++++++-------------------------
 xen/include/xen/sched.h |   1 -
 4 files changed, 67 insertions(+), 81 deletions(-)

diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c
index 9a6582f2a6..f756444362 100644
--- a/xen/arch/arm/smpboot.c
+++ b/xen/arch/arm/smpboot.c
@@ -350,8 +350,6 @@ void start_secondary(unsigned long boot_phys_offset,
 
     setup_cpu_sibling_map(cpuid);
 
-    scheduler_percpu_init(cpuid);
-
     /* Run local notifiers */
     notify_cpu_starting(cpuid);
     /*
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 7e95b2cdac..153bfbb4b7 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -382,8 +382,6 @@ void start_secondary(void *unused)
 
     set_cpu_sibling_map(cpu);
 
-    scheduler_percpu_init(cpu);
-
     init_percpu_time();
 
     setup_secondary_APIC_clock();
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 7a5ab4b1b6..d3e4ae226c 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -83,6 +83,57 @@ extern const struct scheduler *__start_schedulers_array[], *__end_schedulers_arr
 
 static struct scheduler __read_mostly ops;
 
+static spinlock_t *
+sched_idle_switch_sched(struct scheduler *new_ops, unsigned int cpu,
+                        void *pdata, void *vdata)
+{
+    sched_idle_unit(cpu)->priv = NULL;
+
+    return &sched_free_cpu_lock;
+}
+
+static struct sched_resource *
+sched_idle_res_pick(const struct scheduler *ops, struct sched_unit *unit)
+{
+    return unit->res;
+}
+
+static void *
+sched_idle_alloc_vdata(const struct scheduler *ops, struct sched_unit *unit,
+                       void *dd)
+{
+    /* Any non-NULL pointer is fine here. */
+    return (void *)1UL;
+}
+
+static void
+sched_idle_free_vdata(const struct scheduler *ops, void *priv)
+{
+}
+
+static void sched_idle_schedule(
+    const struct scheduler *ops, struct sched_unit *unit, s_time_t now,
+    bool tasklet_work_scheduled)
+{
+    const unsigned int cpu = smp_processor_id();
+
+    unit->next_time = -1;
+    unit->next_task = sched_idle_unit(sched_get_resource_cpu(cpu));
+}
+
+static struct scheduler sched_idle_ops = {
+    .name           = "Idle Scheduler",
+    .opt_name       = "idle",
+    .sched_data     = NULL,
+
+    .pick_resource  = sched_idle_res_pick,
+    .do_schedule    = sched_idle_schedule,
+
+    .alloc_vdata    = sched_idle_alloc_vdata,
+    .free_vdata     = sched_idle_free_vdata,
+    .switch_sched   = sched_idle_switch_sched,
+};
+
 static inline struct vcpu *unit2vcpu_cpu(struct sched_unit *unit,
                                          unsigned int cpu)
 {
@@ -2141,7 +2192,6 @@ static void poll_timer_fn(void *data)
 static int cpu_schedule_up(unsigned int cpu)
 {
     struct sched_resource *sd;
-    void *sched_priv;
 
     sd = xzalloc(struct sched_resource);
     if ( sd == NULL )
@@ -2150,7 +2200,7 @@ static int cpu_schedule_up(unsigned int cpu)
     sd->cpus = cpumask_of(cpu);
     set_sched_res(cpu, sd);
 
-    sd->scheduler = &ops;
+    sd->scheduler = &sched_idle_ops;
     spin_lock_init(&sd->_lock);
     sd->schedule_lock = &sched_free_cpu_lock;
     init_timer(&sd->s_timer, s_timer_fn, NULL, cpu);
@@ -2171,20 +2221,10 @@ static int cpu_schedule_up(unsigned int cpu)
         struct sched_unit *unit = idle->sched_unit;
 
         /*
-         * During (ACPI?) suspend the idle vCPU for this pCPU is not freed,
-         * while its scheduler specific data (what is pointed by sched_priv)
-         * is. Also, at this stage of the resume path, we attach the pCPU
-         * to the default scheduler, no matter in what cpupool it was before
-         * suspend. To avoid inconsistency, let's allocate default scheduler
-         * data for the idle vCPU here. If the pCPU was in a different pool
-         * with a different scheduler, it is schedule_cpu_switch(), invoked
-         * later, that will set things up as appropriate.
+         * No need to allocate any scheduler data, as cpus coming online are
+         * free initially and the idle scheduler doesn't need any data areas
+         * allocated.
          */
-        ASSERT(unit->priv == NULL);
-
-        unit->priv = sched_alloc_vdata(&ops, unit, idle->domain->sched_priv);
-        if ( unit->priv == NULL )
-            return -ENOMEM;
 
         /* Update the resource pointer in the idle unit. */
         unit->res = sd;
@@ -2195,16 +2235,7 @@ static int cpu_schedule_up(unsigned int cpu)
     sd->curr = idle_vcpu[cpu]->sched_unit;
     sd->sched_unit_idle = idle_vcpu[cpu]->sched_unit;
 
-    /*
-     * We don't want to risk calling xfree() on an sd->sched_priv
-     * (e.g., inside free_pdata, from cpu_schedule_down() called
-     * during CPU_UP_CANCELLED) that contains an IS_ERR value.
-     */
-    sched_priv = sched_alloc_pdata(&ops, cpu);
-    if ( IS_ERR(sched_priv) )
-        return PTR_ERR(sched_priv);
-
-    sd->sched_priv = sched_priv;
+    sd->sched_priv = NULL;
 
     return 0;
 }
@@ -2212,13 +2243,6 @@ static int cpu_schedule_up(unsigned int cpu)
 static void cpu_schedule_down(unsigned int cpu)
 {
     struct sched_resource *sd = get_sched_res(cpu);
-    struct scheduler *sched = sd->scheduler;
-
-    sched_free_pdata(sched, sd->sched_priv, cpu);
-    sched_free_vdata(sched, idle_vcpu[cpu]->sched_unit->priv);
-
-    idle_vcpu[cpu]->sched_unit->priv = NULL;
-    sd->sched_priv = NULL;
 
     kill_timer(&sd->s_timer);
 
@@ -2226,26 +2250,14 @@ static void cpu_schedule_down(unsigned int cpu)
     xfree(sd);
 }
 
-void scheduler_percpu_init(unsigned int cpu)
-{
-    struct sched_resource *sd = get_sched_res(cpu);
-    struct scheduler *sched = sd->scheduler;
-
-    if ( system_state != SYS_STATE_resume )
-        sched_init_pdata(sched, sd->sched_priv, cpu);
-}
-
 void sched_rm_cpu(unsigned int cpu)
 {
     int rc;
-    struct sched_resource *sd = get_sched_res(cpu);
-    struct scheduler *sched = sd->scheduler;
 
     rcu_read_lock(&domlist_read_lock);
     rc = cpu_disable_scheduler(cpu);
     BUG_ON(rc);
     rcu_read_unlock(&domlist_read_lock);
-    sched_deinit_pdata(sched, sd->sched_priv, cpu);
     cpu_schedule_down(cpu);
 }
 
@@ -2260,32 +2272,22 @@ static int cpu_schedule_callback(
      * allocating and initializing the per-pCPU scheduler specific data,
      * as well as "registering" this pCPU to the scheduler (which may
      * involve modifying some scheduler wide data structures).
-     * This happens by calling the alloc_pdata and init_pdata hooks, in
-     * this order. A scheduler that does not need to allocate any per-pCPU
-     * data can avoid implementing alloc_pdata. init_pdata may, however, be
-     * necessary/useful in this case too (e.g., it can contain the "register
-     * the pCPU to the scheduler" part). alloc_pdata (if present) is called
-     * during CPU_UP_PREPARE. init_pdata (if present) is called before
-     * CPU_STARTING in scheduler_percpu_init().
+     * As new pCPUs always start as "free" cpus with the minimal idle
+     * scheduler being in charge, we don't need any of that.
      *
      * On the other hand, at teardown, we need to reverse what has been done
-     * during initialization, and then free the per-pCPU specific data. This
-     * happens by calling the deinit_pdata and free_pdata hooks, in this
+     * during initialization, and then free the per-pCPU specific data. A
+     * pCPU brought down is not forced through "free" cpus, so here we need to
+     * use the appropriate hooks.
+     *
+     * This happens by calling the deinit_pdata and free_pdata hooks, in this
      * order. If no per-pCPU memory was allocated, there is no need to
      * provide an implementation of free_pdata. deinit_pdata may, however,
      * be necessary/useful in this case too (e.g., it can undo something done
      * on scheduler wide data structure during init_pdata). Both deinit_pdata
      * and free_pdata are called during CPU_DEAD.
      *
-     * If someting goes wrong during bringup, we go to CPU_UP_CANCELLED
-     * *before* having called init_pdata. In this case, as there is no
-     * initialization needing undoing, only free_pdata should be called.
-     * This means it is possible to call free_pdata just after alloc_pdata,
-     * without a init_pdata/deinit_pdata "cycle" in between the two.
-     *
-     * So, in summary, the usage pattern should look either
-     *  - alloc_pdata-->init_pdata-->deinit_pdata-->free_pdata, or
-     *  - alloc_pdata-->free_pdata.
+     * If someting goes wrong during bringup, we go to CPU_UP_CANCELLED.
      */
     switch ( action )
     {
@@ -2402,9 +2404,6 @@ void __init scheduler_init(void)
         BUG();
     get_sched_res(0)->curr = idle_vcpu[0]->sched_unit;
     get_sched_res(0)->sched_unit_idle = idle_vcpu[0]->sched_unit;
-    get_sched_res(0)->sched_priv = sched_alloc_pdata(&ops, 0);
-    BUG_ON(IS_ERR(get_sched_res(0)->sched_priv));
-    scheduler_percpu_init(0);
 }
 
 /*
@@ -2412,18 +2411,14 @@ void __init scheduler_init(void)
  * cpupool, or subject it to the scheduler of a new cpupool.
  *
  * For the pCPUs that are removed from their cpupool, their scheduler becomes
- * &ops (the default scheduler, selected at boot, which also services the
- * default cpupool). However, as these pCPUs are not really part of any pool,
- * there won't be any scheduling event on them, not even from the default
- * scheduler. Basically, they will just sit idle until they are explicitly
- * added back to a cpupool.
+ * &sched_idle_ops (the idle scheduler).
  */
 int schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
 {
     struct vcpu *idle;
     void *ppriv, *ppriv_old, *vpriv, *vpriv_old;
     struct scheduler *old_ops = get_sched_res(cpu)->scheduler;
-    struct scheduler *new_ops = (c == NULL) ? &ops : c->sched;
+    struct scheduler *new_ops = (c == NULL) ? &sched_idle_ops : c->sched;
     struct sched_resource *sd = get_sched_res(cpu);
     struct cpupool *old_pool = sd->cpupool;
     spinlock_t *old_lock, *new_lock;
@@ -2443,9 +2438,6 @@ int schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
     ASSERT((c == NULL && !cpumask_test_cpu(cpu, old_pool->cpu_valid)) ||
            (c != NULL && !cpumask_test_cpu(cpu, c->cpu_valid)));
 
-    if ( old_ops == new_ops )
-        goto out;
-
     /*
      * To setup the cpu for the new scheduler we need:
      *  - a valid instance of per-CPU scheduler specific data, as it is
@@ -2498,7 +2490,7 @@ int schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
      * taking it, finds all the initializations we've done above in place.
      */
     smp_mb();
-    sd->schedule_lock = c ? new_lock : &sched_free_cpu_lock;
+    sd->schedule_lock = new_lock;
 
     /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */
     spin_unlock_irqrestore(old_lock, flags);
@@ -2510,7 +2502,6 @@ int schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
     sched_free_vdata(old_ops, vpriv_old);
     sched_free_pdata(old_ops, ppriv_old, cpu);
 
- out:
     get_sched_res(cpu)->granularity = c ? c->granularity : 1;
     get_sched_res(cpu)->cpupool = c;
     /* When a cpu is added to a pool, trigger it to go pick up some work */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 7dc63c449b..e689bba361 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -677,7 +677,6 @@ void __domain_crash(struct domain *d);
 void noreturn asm_domain_crash_synchronous(unsigned long addr);
 
 void scheduler_init(void);
-void scheduler_percpu_init(unsigned int cpu);
 int  sched_init_vcpu(struct vcpu *v);
 void sched_destroy_vcpu(struct vcpu *v);
 int  sched_init_domain(struct domain *d, int poolid);
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2019-05-28 10:33 UTC|newest]

Thread overview: 202+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-28 10:32 [PATCH 00/60] xen: add core scheduling support Juergen Gross
2019-05-28 10:32 ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 01/60] xen/sched: only allow schedulers with all mandatory functions available Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-06-11 16:03   ` Dario Faggioli
2019-05-28 10:32 ` [PATCH 02/60] xen/sched: add inline wrappers for calling per-scheduler functions Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-06-11 16:21   ` Dario Faggioli
2019-05-28 10:32 ` [PATCH 03/60] xen/sched: let sched_switch_sched() return new lock address Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-06-11 16:55   ` Dario Faggioli
2019-06-12  7:40     ` Jan Beulich
2019-06-12  8:06       ` Juergen Gross
2019-06-12  9:44         ` Dario Faggioli
2019-06-12  8:05   ` Andrew Cooper
2019-06-12  8:19     ` Juergen Gross
2019-06-12  9:32       ` Jan Beulich
     [not found]       ` <5D00C6960200007800237622@suse.com>
2019-06-12  9:56         ` Dario Faggioli
2019-06-12 10:14           ` Jan Beulich
2019-06-12 11:27             ` Andrew Cooper
2019-06-12 13:32               ` Dario Faggioli
2019-05-28 10:32 ` [PATCH 04/60] xen/sched: use new sched_unit instead of vcpu in scheduler interfaces Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-07-18 17:44   ` Dario Faggioli
2019-07-19  4:49     ` Juergen Gross
2019-07-19 17:01       ` Dario Faggioli
2019-05-28 10:32 ` [PATCH 05/60] xen/sched: alloc struct sched_unit for each vcpu Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-07-18 17:57   ` Dario Faggioli
2019-07-19  4:56     ` Juergen Gross
2019-07-19 17:04       ` Dario Faggioli
2019-05-28 10:32 ` [PATCH 06/60] xen/sched: move per-vcpu scheduler private data pointer to sched_unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-07-18 22:52   ` Dario Faggioli
2019-07-19  5:03     ` Juergen Gross
2019-07-19 17:10       ` Dario Faggioli
2019-05-28 10:32 ` [PATCH 07/60] xen/sched: build a linked list of struct sched_unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-07-19  0:01   ` Dario Faggioli
2019-07-19  5:07     ` Juergen Gross
2019-07-19 17:16       ` Dario Faggioli
2019-05-28 10:32 ` [PATCH 08/60] xen/sched: introduce struct sched_resource Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-07-19 17:43   ` Dario Faggioli
2019-07-19 17:49   ` Dario Faggioli
2019-05-28 10:32 ` [PATCH 09/60] xen/sched: let pick_cpu return a scheduler resource Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-07-19 18:06   ` Dario Faggioli
2019-05-28 10:32 ` [PATCH 10/60] xen/sched: switch schedule_data.curr to point at sched_unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-07-29 22:08   ` Dario Faggioli
2019-05-28 10:32 ` [PATCH 11/60] xen/sched: move per cpu scheduler private data into struct sched_resource Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 11:32   ` Jan Beulich
2019-05-28 11:32     ` [Xen-devel] " Jan Beulich
2019-07-29 22:22   ` Dario Faggioli
2019-05-28 10:32 ` [PATCH 12/60] xen/sched: switch vcpu_schedule_lock to unit_schedule_lock Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 13/60] xen/sched: move some per-vcpu items to struct sched_unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-06-13  7:18   ` Andrii Anisov
2019-06-13  7:29     ` Juergen Gross
2019-06-13  7:34       ` Andrii Anisov
2019-06-13  8:39         ` Juergen Gross
2019-06-13  8:49           ` Andrii Anisov
2019-05-28 10:32 ` [PATCH 14/60] xen/sched: add scheduler helpers hiding vcpu Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 15/60] xen/sched: add domain pointer to struct sched_unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 16/60] xen/sched: add id " Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 17/60] xen/sched: rename scheduler related perf counters Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 18/60] xen/sched: switch struct task_slice from vcpu to sched_unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 19/60] xen/sched: add is_running indicator to struct sched_unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 20/60] xen/sched: make null scheduler vcpu agnostic Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 21/60] xen/sched: make rt " Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 22/60] xen/sched: make credit " Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 23/60] xen/sched: make credit2 " Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 24/60] xen/sched: make arinc653 " Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 25/60] xen: add sched_unit_pause_nosync() and sched_unit_unpause() Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 26/60] xen: let vcpu_create() select processor Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 27/60] xen/sched: use sched_resource cpu instead smp_processor_id in schedulers Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 28/60] xen/sched: switch schedule() from vcpus to sched_units Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 29/60] xen/sched: switch sched_move_irqs() to take sched_unit as parameter Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 30/60] xen: switch from for_each_vcpu() to for_each_sched_unit() Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 31/60] xen/sched: add runstate counters to struct sched_unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 32/60] xen/sched: rework and rename vcpu_force_reschedule() Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 33/60] xen/sched: Change vcpu_migrate_*() to operate on schedule unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 34/60] xen/sched: move struct task_slice into struct sched_unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 35/60] xen/sched: add code to sync scheduling of all vcpus of a sched unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 36/60] xen/sched: introduce unit_runnable_state() Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 37/60] xen/sched: add support for multiple vcpus per sched unit where missing Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 38/60] x86: make loading of GDT at context switch more modular Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-07-02 15:38   ` Andrew Cooper
2019-05-28 10:32 ` [PATCH 39/60] x86: optimize loading of GDT at context switch Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-07-02 16:09   ` Andrew Cooper
2019-07-03  6:30     ` Juergen Gross
2019-07-03 12:21       ` Andrew Cooper
2019-07-05  7:30         ` Juergen Gross
2019-05-28 10:32 ` [PATCH 40/60] xen/sched: modify cpupool_domain_cpumask() to be an unit mask Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 41/60] xen/sched: support allocating multiple vcpus into one sched unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 42/60] xen/sched: add a scheduler_percpu_init() function Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 43/60] xen/sched: add a percpu resource index Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 44/60] xen/sched: add fall back to idle vcpu when scheduling unit Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 45/60] xen/sched: make vcpu_wake() and vcpu_sleep() core scheduling aware Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:32 ` [PATCH 46/60] xen/sched: carve out freeing sched_unit memory into dedicated function Juergen Gross
2019-05-28 10:32   ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` [PATCH 47/60] xen/sched: move per-cpu variable scheduler to struct sched_resource Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` [PATCH 48/60] xen/sched: move per-cpu variable cpupool " Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` [PATCH 49/60] xen/sched: reject switching smt on/off with core scheduling active Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 11:44   ` Jan Beulich
2019-05-28 11:44     ` [Xen-devel] " Jan Beulich
2019-05-28 11:52     ` Juergen Gross
2019-05-28 11:52       ` [Xen-devel] " Juergen Gross
2019-06-12  9:36       ` Dario Faggioli
2019-05-28 10:33 ` [PATCH 50/60] xen/sched: prepare per-cpupool scheduling granularity Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` [PATCH 51/60] xen/sched: use one schedule lock for all free cpus Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` [PATCH 52/60] xen/sched: populate cpupool0 only after all cpus are up Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` [PATCH 53/60] xen/sched: remove cpu from pool0 before removing it Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` Juergen Gross [this message]
2019-05-28 10:33   ` [Xen-devel] [PATCH 54/60] xen/sched: add minimalistic idle scheduler for free cpus Juergen Gross
2019-05-28 11:47   ` Jan Beulich
2019-05-28 11:47     ` [Xen-devel] " Jan Beulich
2019-05-28 11:58     ` Juergen Gross
2019-05-28 11:58       ` [Xen-devel] " Juergen Gross
2019-05-31 14:15       ` Dario Faggioli
2019-05-31 14:15         ` [Xen-devel] " Dario Faggioli
2019-05-31 15:52   ` Dario Faggioli
2019-05-31 15:52     ` [Xen-devel] " Dario Faggioli
2019-05-31 16:44     ` Juergen Gross
2019-05-31 16:44       ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` [PATCH 55/60] xen/sched: split schedule_cpu_switch() Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` [PATCH 56/60] xen/sched: protect scheduling resource via rcu Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` [PATCH 57/60] xen/sched: support multiple cpus per scheduling resource Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` [PATCH 58/60] xen/sched: support differing granularity in schedule_cpu_[add/rm]() Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` [PATCH 59/60] xen/sched: support core scheduling for moving cpus to/from cpupools Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 10:33 ` [PATCH 60/60] xen/sched: add scheduling granularity enum Juergen Gross
2019-05-28 10:33   ` [Xen-devel] " Juergen Gross
2019-05-28 11:51   ` Jan Beulich
2019-05-28 11:51     ` [Xen-devel] " Jan Beulich
2019-05-28 12:02     ` Juergen Gross
2019-05-28 12:02       ` [Xen-devel] " Juergen Gross
2019-07-19 18:31   ` Dario Faggioli
2019-07-05 13:17 ` [Xen-devel] [PATCH 00/60] xen: add core scheduling support Sergey Dyasli
2019-07-05 13:22   ` Juergen Gross
2019-07-05 13:56   ` Dario Faggioli
2019-07-15 14:08     ` Sergey Dyasli
2019-07-18 14:48       ` Juergen Gross
2019-07-18 15:14         ` Sergey Dyasli
2019-07-18 16:04           ` Dario Faggioli
2019-07-19  5:41           ` Juergen Gross
2019-07-19 11:24             ` Juergen Gross
2019-07-19 13:57           ` Juergen Gross
2019-07-22 14:22             ` Sergey Dyasli
2019-07-24  9:13               ` Juergen Gross
2019-07-24 14:54                 ` Sergey Dyasli
2019-07-24 15:11                   ` Juergen Gross
2019-07-16 15:45   ` Sergey Dyasli
2019-07-19 13:35     ` Juergen Gross
2019-07-25 16:01   ` Sergey Dyasli
2019-07-11 13:40 ` Dario Faggioli
     [not found] <20190528103313.1343„1„jgross@suse.com>
     [not found] ` <20190528103313.1343„4„jgross@suse.com>
     [not found] <20190528103313.13431jgross@suse.com>
     [not found] ` <20190528103313.13434jgross@suse.com>

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190528103313.1343-55-jgross@suse.com \
    --to=jgross@suse.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=dfaggioli@suse.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=julien.grall@arm.com \
    --cc=konrad.wilk@oracle.com \
    --cc=roger.pau@citrix.com \
    --cc=sstabellini@kernel.org \
    --cc=tim@xen.org \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.