xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] xen: Some code motion to avoid having to do forward-declaration
@ 2016-07-15 18:02 George Dunlap
  2016-07-15 18:02 ` [PATCH 2/3] xen: Have schedulers revise initial placement George Dunlap
                   ` (3 more replies)
  0 siblings, 4 replies; 33+ messages in thread
From: George Dunlap @ 2016-07-15 18:02 UTC (permalink / raw)
  To: xen-devel; +Cc: Dario Faggioli, George Dunlap, Anshul Makkar, Meng Xu

For sched_credit2, move the vcpu insert / remove / free functions near the domain
insert / remove / alloc / free functions (and after cpu_pick).

For sched_rt, move rt_cpu_pick() further up.

This is pure code motion; no functional change.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
CC: Dario Faggioli <dario.faggioli@citrix.com>
CC: Anshul Makkar <anshul.makkar@citrix.com>
CC: Meng Xu <mengxu@cis.upenn.edu>
---
 xen/common/sched_credit2.c | 118 ++++++++++++++++++++++-----------------------
 xen/common/sched_rt.c      |  46 +++++++++---------
 2 files changed, 82 insertions(+), 82 deletions(-)

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 8b95a47..3b9aa27 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -971,65 +971,6 @@ runq_deassign(const struct scheduler *ops, struct vcpu *vc)
 }
 
 static void
-csched2_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
-{
-    struct csched2_vcpu *svc = vc->sched_priv;
-    struct csched2_dom * const sdom = svc->sdom;
-    spinlock_t *lock;
-
-    printk("%s: Inserting %pv\n", __func__, vc);
-
-    BUG_ON(is_idle_vcpu(vc));
-
-    /* Add vcpu to runqueue of initial processor */
-    lock = vcpu_schedule_lock_irq(vc);
-
-    runq_assign(ops, vc);
-
-    vcpu_schedule_unlock_irq(lock, vc);
-
-    sdom->nr_vcpus++;
-
-    SCHED_STAT_CRANK(vcpu_insert);
-
-    CSCHED2_VCPU_CHECK(vc);
-}
-
-static void
-csched2_free_vdata(const struct scheduler *ops, void *priv)
-{
-    struct csched2_vcpu *svc = priv;
-
-    xfree(svc);
-}
-
-static void
-csched2_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
-{
-    struct csched2_vcpu * const svc = CSCHED2_VCPU(vc);
-    struct csched2_dom * const sdom = svc->sdom;
-
-    BUG_ON( sdom == NULL );
-    BUG_ON( !list_empty(&svc->runq_elem) );
-
-    if ( ! is_idle_vcpu(vc) )
-    {
-        spinlock_t *lock;
-
-        SCHED_STAT_CRANK(vcpu_remove);
-
-        /* Remove from runqueue */
-        lock = vcpu_schedule_lock_irq(vc);
-
-        runq_deassign(ops, vc);
-
-        vcpu_schedule_unlock_irq(lock, vc);
-
-        svc->sdom->nr_vcpus--;
-    }
-}
-
-static void
 csched2_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
 {
     struct csched2_vcpu * const svc = CSCHED2_VCPU(vc);
@@ -1668,6 +1609,65 @@ csched2_dom_destroy(const struct scheduler *ops, struct domain *dom)
     csched2_free_domdata(ops, CSCHED2_DOM(dom));
 }
 
+static void
+csched2_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
+{
+    struct csched2_vcpu *svc = vc->sched_priv;
+    struct csched2_dom * const sdom = svc->sdom;
+    spinlock_t *lock;
+
+    printk("%s: Inserting %pv\n", __func__, vc);
+
+    BUG_ON(is_idle_vcpu(vc));
+
+    /* Add vcpu to runqueue of initial processor */
+    lock = vcpu_schedule_lock_irq(vc);
+
+    runq_assign(ops, vc);
+
+    vcpu_schedule_unlock_irq(lock, vc);
+
+    sdom->nr_vcpus++;
+
+    SCHED_STAT_CRANK(vcpu_insert);
+
+    CSCHED2_VCPU_CHECK(vc);
+}
+
+static void
+csched2_free_vdata(const struct scheduler *ops, void *priv)
+{
+    struct csched2_vcpu *svc = priv;
+
+    xfree(svc);
+}
+
+static void
+csched2_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
+{
+    struct csched2_vcpu * const svc = CSCHED2_VCPU(vc);
+    struct csched2_dom * const sdom = svc->sdom;
+
+    BUG_ON( sdom == NULL );
+    BUG_ON( !list_empty(&svc->runq_elem) );
+
+    if ( ! is_idle_vcpu(vc) )
+    {
+        spinlock_t *lock;
+
+        SCHED_STAT_CRANK(vcpu_remove);
+
+        /* Remove from runqueue */
+        lock = vcpu_schedule_lock_irq(vc);
+
+        runq_deassign(ops, vc);
+
+        vcpu_schedule_unlock_irq(lock, vc);
+
+        svc->sdom->nr_vcpus--;
+    }
+}
+
 /* How long should we let this vcpu run for? */
 static s_time_t
 csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext)
diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 98524a6..bd3a2a0 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -582,6 +582,29 @@ replq_reinsert(const struct scheduler *ops, struct rt_vcpu *svc)
 }
 
 /*
+ * Pick a valid CPU for the vcpu vc
+ * Valid CPU of a vcpu is intesection of vcpu's affinity
+ * and available cpus
+ */
+static int
+rt_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
+{
+    cpumask_t cpus;
+    cpumask_t *online;
+    int cpu;
+
+    online = cpupool_domain_cpumask(vc->domain);
+    cpumask_and(&cpus, online, vc->cpu_hard_affinity);
+
+    cpu = cpumask_test_cpu(vc->processor, &cpus)
+            ? vc->processor
+            : cpumask_cycle(vc->processor, &cpus);
+    ASSERT( !cpumask_empty(&cpus) && cpumask_test_cpu(cpu, &cpus) );
+
+    return cpu;
+}
+
+/*
  * Init/Free related code
  */
 static int
@@ -894,29 +917,6 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
 }
 
 /*
- * Pick a valid CPU for the vcpu vc
- * Valid CPU of a vcpu is intesection of vcpu's affinity
- * and available cpus
- */
-static int
-rt_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
-{
-    cpumask_t cpus;
-    cpumask_t *online;
-    int cpu;
-
-    online = cpupool_domain_cpumask(vc->domain);
-    cpumask_and(&cpus, online, vc->cpu_hard_affinity);
-
-    cpu = cpumask_test_cpu(vc->processor, &cpus)
-            ? vc->processor
-            : cpumask_cycle(vc->processor, &cpus);
-    ASSERT( !cpumask_empty(&cpus) && cpumask_test_cpu(cpu, &cpus) );
-
-    return cpu;
-}
-
-/*
  * Burn budget in nanosecond granularity
  */
 static void
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-15 18:02 [PATCH 1/3] xen: Some code motion to avoid having to do forward-declaration George Dunlap
@ 2016-07-15 18:02 ` George Dunlap
  2016-07-15 18:07   ` Andrew Cooper
                     ` (3 more replies)
  2016-07-15 18:02 ` [PATCH 3/3] xen: Remove buggy initial placement algorithm George Dunlap
                   ` (2 subsequent siblings)
  3 siblings, 4 replies; 33+ messages in thread
From: George Dunlap @ 2016-07-15 18:02 UTC (permalink / raw)
  To: xen-devel
  Cc: Dario Faggioli, Jan Beulich, George Dunlap, Anshul Makkar, Meng Xu

The generic domain creation logic in
xen/common/domctl.c:default_vcpu0_location() attempts to try to do
initial placement load-balancing by placing vcpu 0 on the least-busy
non-primary hyperthread available.  Unfortunately, the logic can end
up picking a pcpu that's not in the online mask.  When this is passed
to a scheduler such which assumes that the initial assignment is
valid, it causes a null pointer dereference looking up the runqueue.

Furthermore, this initial placement doesn't take into account hard or
soft affinity, or any scheduler-specific knowledge (such as historic
runqueue load, as in credit2).

To solve this, when inserting a vcpu, always call the per-scheduler
"pick" function to revise the initial placement.  This will
automatically take all knowledge the scheduler has into account.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
CC: Dario Faggioli <dario.faggioli@citrix.com>
CC: Anshul Makkar <anshul.makkar@citrix.com>
CC: Meng Xu <mengxu@cis.upenn.edu>
CC: Jan Beulich <jbeulich@suse.com>
---
 xen/common/sched_credit.c  |  3 +++
 xen/common/sched_credit2.c | 12 ++++++++++--
 xen/common/sched_rt.c      |  3 +++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index ac22746..a3ef00a 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -998,6 +998,9 @@ csched_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
 
     BUG_ON( is_idle_vcpu(vc) );
 
+    /* This is safe because vc isn't yet being scheduled */
+    vc->processor = csched_cpu_pick(ops, vc);
+
     lock = vcpu_schedule_lock_irq(vc);
 
     if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 3b9aa27..5a04985 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -1620,15 +1620,23 @@ csched2_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
 
     BUG_ON(is_idle_vcpu(vc));
 
+    /* Locks in cpu_pick expect irqs to be disabled */
+    local_irq_disable();
+    
+    /* This is safe because vc isn't yet being scheduled */
+    vc->processor = csched2_cpu_pick(ops, vc);
+
     /* Add vcpu to runqueue of initial processor */
-    lock = vcpu_schedule_lock_irq(vc);
+    lock = vcpu_schedule_lock(vc);
 
     runq_assign(ops, vc);
 
-    vcpu_schedule_unlock_irq(lock, vc);
+    vcpu_schedule_unlock(lock, vc);
 
     sdom->nr_vcpus++;
 
+    local_irq_enable();
+
     SCHED_STAT_CRANK(vcpu_insert);
 
     CSCHED2_VCPU_CHECK(vc);
diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index bd3a2a0..41c61a7 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -874,6 +874,9 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
 
     BUG_ON( is_idle_vcpu(vc) );
 
+    /* This is safe because vc isn't yet being scheduled */
+    vc->processor = rt_cpu_pick(ops, vc);
+
     lock = vcpu_schedule_lock_irq(vc);
 
     now = NOW();
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 3/3] xen: Remove buggy initial placement algorithm
  2016-07-15 18:02 [PATCH 1/3] xen: Some code motion to avoid having to do forward-declaration George Dunlap
  2016-07-15 18:02 ` [PATCH 2/3] xen: Have schedulers revise initial placement George Dunlap
@ 2016-07-15 18:02 ` George Dunlap
  2016-07-15 18:10   ` Andrew Cooper
  2016-07-16 13:55   ` Dario Faggioli
  2016-07-16 15:48 ` [PATCH 1/3] xen: Some code motion to avoid having to do forward-declaration Meng Xu
  2016-07-18  9:58 ` Dario Faggioli
  3 siblings, 2 replies; 33+ messages in thread
From: George Dunlap @ 2016-07-15 18:02 UTC (permalink / raw)
  To: xen-devel
  Cc: Stefano Stabellini, Wei Liu, Andrew Cooper, Dario Faggioli,
	Ian Jackson, George Dunlap, Tim Deegan, Meng Xu, Jan Beulich,
	Anshul Makkar

The initial placement algorithm sometimes picks cpus outside of the
mask it's given, does a lot of unnecessary bitmasking, does its own
separate load calculation, and completely ignores vcpu hard and soft
affinities.  Just get rid of it and rely on the schedulers to do
initial placement.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
Since many of scheduler cpu_pick functions have a strong preference to
just leave the cpu where it is (in particular, credit1 and rt), this
may cause some cpus to be overloaded when creating a lot of domains.
Arguably this should be fixed in the schedulers themselves.

The core problem with default_vcpu0_location() is that it chooses its
initial cpu based on the sibling of pcpu 0, not the first available
sibling in the online mask; so if pcpu 1 ends up being less "busy"
than all the cpus in the pool, then it ends up being chosen even
though it's not in the pool.

Fixing the algorithm would involve starting with the sibling map of
cpumask_first(online) rather than 0, and then having all sibling
checks not only test that the result of cpumask_next() < nr_cpu_ids,
but that the result is in online.

Additionally, as far as I can tell, the cpumask_test_cpu(i,
&cpu_exclude_map) at the top of the for_each_cpu() loop can never
return false; and this both this test and the cpumask_or() are
unnecessary and should be removed.

CC: Dario Faggioli <dario.faggioli@citrix.com>
CC: Anshul Makkar <anshul.makkar@citrix.com>
CC: Meng Xu <mengxu@cis.upenn.edu>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Ian Jackson <ian.jackson@eu.citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
CC: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Tim Deegan <tim@xen.org>
CC: Wei Liu <wei.liu2@citrix.com>
---
 xen/common/domctl.c | 50 +-------------------------------------------------
 1 file changed, 1 insertion(+), 49 deletions(-)

diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index b784e6c..cc85e27 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -217,54 +217,6 @@ void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
     memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
 }
 
-static unsigned int default_vcpu0_location(cpumask_t *online)
-{
-    struct domain *d;
-    struct vcpu   *v;
-    unsigned int   i, cpu, nr_cpus, *cnt;
-    cpumask_t      cpu_exclude_map;
-
-    /* Do an initial CPU placement. Pick the least-populated CPU. */
-    nr_cpus = cpumask_last(&cpu_online_map) + 1;
-    cnt = xzalloc_array(unsigned int, nr_cpus);
-    if ( cnt )
-    {
-        rcu_read_lock(&domlist_read_lock);
-        for_each_domain ( d )
-            for_each_vcpu ( d, v )
-                if ( !(v->pause_flags & VPF_down)
-                     && ((cpu = v->processor) < nr_cpus) )
-                    cnt[cpu]++;
-        rcu_read_unlock(&domlist_read_lock);
-    }
-
-    /*
-     * If we're on a HT system, we only auto-allocate to a non-primary HT. We
-     * favour high numbered CPUs in the event of a tie.
-     */
-    cpumask_copy(&cpu_exclude_map, per_cpu(cpu_sibling_mask, 0));
-    cpu = cpumask_first(&cpu_exclude_map);
-    i = cpumask_next(cpu, &cpu_exclude_map);
-    if ( i < nr_cpu_ids )
-        cpu = i;
-    for_each_cpu(i, online)
-    {
-        if ( cpumask_test_cpu(i, &cpu_exclude_map) )
-            continue;
-        if ( (i == cpumask_first(per_cpu(cpu_sibling_mask, i))) &&
-             (cpumask_next(i, per_cpu(cpu_sibling_mask, i)) < nr_cpu_ids) )
-            continue;
-        cpumask_or(&cpu_exclude_map, &cpu_exclude_map,
-                   per_cpu(cpu_sibling_mask, i));
-        if ( !cnt || cnt[i] <= cnt[cpu] )
-            cpu = i;
-    }
-
-    xfree(cnt);
-
-    return cpu;
-}
-
 bool_t domctl_lock_acquire(void)
 {
     /*
@@ -691,7 +643,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
                 continue;
 
             cpu = (i == 0) ?
-                default_vcpu0_location(online) :
+                cpumask_first(online) :
                 cpumask_cycle(d->vcpu[i-1]->processor, online);
 
             if ( alloc_vcpu(d, i, cpu) == NULL )
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-15 18:02 ` [PATCH 2/3] xen: Have schedulers revise initial placement George Dunlap
@ 2016-07-15 18:07   ` Andrew Cooper
  2016-07-16 14:12     ` Dario Faggioli
  2016-07-18 10:28   ` Dario Faggioli
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 33+ messages in thread
From: Andrew Cooper @ 2016-07-15 18:07 UTC (permalink / raw)
  To: George Dunlap, xen-devel
  Cc: Dario Faggioli, Anshul Makkar, Jan Beulich, Meng Xu

On 15/07/16 19:02, George Dunlap wrote:
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index 3b9aa27..5a04985 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -1620,15 +1620,23 @@ csched2_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
>  
>      BUG_ON(is_idle_vcpu(vc));
>  
> +    /* Locks in cpu_pick expect irqs to be disabled */
> +    local_irq_disable();

This doesn't make the problem much worse, but is there a plan to fix
this issue?

None of the scheduler-accounting functions should be disabling interrupts.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/3] xen: Remove buggy initial placement algorithm
  2016-07-15 18:02 ` [PATCH 3/3] xen: Remove buggy initial placement algorithm George Dunlap
@ 2016-07-15 18:10   ` Andrew Cooper
  2016-07-16 13:55   ` Dario Faggioli
  1 sibling, 0 replies; 33+ messages in thread
From: Andrew Cooper @ 2016-07-15 18:10 UTC (permalink / raw)
  To: George Dunlap, xen-devel
  Cc: Stefano Stabellini, Wei Liu, Tim Deegan, Dario Faggioli,
	Ian Jackson, Meng Xu, Jan Beulich, Anshul Makkar

On 15/07/16 19:02, George Dunlap wrote:
> The initial placement algorithm sometimes picks cpus outside of the
> mask it's given, does a lot of unnecessary bitmasking, does its own
> separate load calculation, and completely ignores vcpu hard and soft
> affinities.  Just get rid of it and rely on the schedulers to do
> initial placement.
>
> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> ---
> Since many of scheduler cpu_pick functions have a strong preference to
> just leave the cpu where it is (in particular, credit1 and rt), this
> may cause some cpus to be overloaded when creating a lot of domains.
> Arguably this should be fixed in the schedulers themselves.
>
> The core problem with default_vcpu0_location() is that it chooses its
> initial cpu based on the sibling of pcpu 0, not the first available
> sibling in the online mask; so if pcpu 1 ends up being less "busy"
> than all the cpus in the pool, then it ends up being chosen even
> though it's not in the pool.
>
> Fixing the algorithm would involve starting with the sibling map of
> cpumask_first(online) rather than 0, and then having all sibling
> checks not only test that the result of cpumask_next() < nr_cpu_ids,
> but that the result is in online.
>
> Additionally, as far as I can tell, the cpumask_test_cpu(i,
> &cpu_exclude_map) at the top of the for_each_cpu() loop can never
> return false; and this both this test and the cpumask_or() are
> unnecessary and should be removed.

Presumably the overloaded pcpu will quickly become less loaded as
work-stealing starts to happen?

As for default_vcpu0_location(), getting rid of it definitely looks like
a good move.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/3] xen: Remove buggy initial placement algorithm
  2016-07-15 18:02 ` [PATCH 3/3] xen: Remove buggy initial placement algorithm George Dunlap
  2016-07-15 18:10   ` Andrew Cooper
@ 2016-07-16 13:55   ` Dario Faggioli
  2016-07-18 10:03     ` George Dunlap
  1 sibling, 1 reply; 33+ messages in thread
From: Dario Faggioli @ 2016-07-16 13:55 UTC (permalink / raw)
  To: George Dunlap, xen-devel
  Cc: Stefano Stabellini, Wei Liu, Andrew Cooper, Anshul Makkar,
	Ian Jackson, Tim Deegan, Meng Xu, Jan Beulich


[-- Attachment #1.1: Type: text/plain, Size: 2121 bytes --]

On Fri, 2016-07-15 at 19:02 +0100, George Dunlap wrote:
> The initial placement algorithm sometimes picks cpus outside of the
> mask it's given, does a lot of unnecessary bitmasking, does its own
> separate load calculation, and completely ignores vcpu hard and soft
> affinities.  
>
Not to mention that I wouldn't even call what it does "load
calculation".

It just counts the number of vcpus that are executing (or have executed
their last instance) on each CPU, which tells very few about load. And
it does that without any locking at all, which I see the reason why,
but the net effect is that the final result comes from a wrong
calculation done on unreliable data... I don't see how this could be
more broken! :-P

> Just get rid of it and rely on the schedulers to do
> initial placement.
> 
This all makes a lot of sense to me, and I'm all for it, thanks for
doing it! :-)

> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> ---
> Since many of scheduler cpu_pick functions have a strong preference
> to
> just leave the cpu where it is (in particular, credit1 and rt), this
> may cause some cpus to be overloaded when creating a lot of domains.
> Arguably this should be fixed in the schedulers themselves.
> 
Indeed. Still, maybe...

> @@ -691,7 +643,7 @@ long
> do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>                  continue;
>  
>              cpu = (i == 0) ?
> -                default_vcpu0_location(online) :
> +                cpumask_first(online) :
>                  cpumask_cycle(d->vcpu[i-1]->processor, online);
>
...cpumask_any() ?

It's a bit more expensive, but it at least would provided a less biased
(toward lower CPU indexes) basis to the schedulers?

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-15 18:07   ` Andrew Cooper
@ 2016-07-16 14:12     ` Dario Faggioli
  2016-07-18 18:10       ` Andrew Cooper
  0 siblings, 1 reply; 33+ messages in thread
From: Dario Faggioli @ 2016-07-16 14:12 UTC (permalink / raw)
  To: Andrew Cooper, George Dunlap, xen-devel
  Cc: Anshul Makkar, Meng Xu, Jan Beulich


[-- Attachment #1.1: Type: text/plain, Size: 3304 bytes --]

On Fri, 2016-07-15 at 19:07 +0100, Andrew Cooper wrote:
> On 15/07/16 19:02, George Dunlap wrote:
> > 
> > diff --git a/xen/common/sched_credit2.c
> > b/xen/common/sched_credit2.c
> > index 3b9aa27..5a04985 100644
> > --- a/xen/common/sched_credit2.c
> > +++ b/xen/common/sched_credit2.c
> > @@ -1620,15 +1620,23 @@ csched2_vcpu_insert(const struct scheduler
> > *ops, struct vcpu *vc)
> >  
> >      BUG_ON(is_idle_vcpu(vc));
> >  
> > +    /* Locks in cpu_pick expect irqs to be disabled */
> > +    local_irq_disable();
> This doesn't make the problem much worse, but is there a plan to fix
> this issue?
> 
There's a little bit more than a plan.

I've got a proof of concept implementation which was working (now I
need to refresh it), but for which I never really managed to evaluate
the performance impact as accurately as I wanted to.

In fact, I actually have a couple of variants implemented, that I was
comparing against each others, in addition to against 'vanilla'. The
problem was that I really was not seeing any impact at all, which
looked strange (I was expecting improvement, at least on some
workloads), and I wanted to investigate further.

I'm leaving here the link to two branches, where I stashed some of the
code that I have come up so far. As I said, it's WIP and needs
refreshing and reworking.

> None of the scheduler-accounting functions should be disabling
> interrupts.
> 
They don't. But you can't keep irq disabled for some operations and
enabled for others, on the same lock (because of the irq-safety
spinlock checks/enforcement).

So you have to always keep IRQ enabled, for all scheduling operations,
which is ok for _almost_ all of them, with the only exception of the
wakeup of a vcpu.

So, the idea was to treat that one case specially, i.e., put the waking
vcpus in a queue, and then drain the queue somehow. The insertion in
the queue needs to be done disabling interrupts, but the draining
--which is where the actual scheduling related hooks and operations are
done-- can be done with IRQs on, which is what we want.

What I was experimenting on was trying different ways of managing such
a queue, e.g., only one queue for all CPUs or per-CPU queues; or
whether to always drain the queue or only pick a couple of vcpu and
defer the rest again; or whether to allow concurrent draining of the
queue, or only have one CPU (at a time) doing that; etc etc.

The "1 queue for all" and "per-CPU queues" is what's in the following
two branches:

git://xenbits.xen.org/people/dariof/xen.git  wip/sched/irq-enabled
http://xenbits.xen.org/gitweb/?p=people/dariof/xen.git;a=shortlog;h=refs/heads/wip/sched/irq-enabled

git://xenbits.xen.org/people/dariof/xen.git  wip/sched/irq-enabled-percpu
http://xenbits.xen.org/gitweb/?p=people/dariof/xen.git;a=shortlog;h=refs/heads/wip/sched/irq-enabled-percpu

I'll get back to this soon. In the meanwhile, feel free to comment,
toss ideas, criticize, whatever. :-D

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/3] xen: Some code motion to avoid having to do forward-declaration
  2016-07-15 18:02 [PATCH 1/3] xen: Some code motion to avoid having to do forward-declaration George Dunlap
  2016-07-15 18:02 ` [PATCH 2/3] xen: Have schedulers revise initial placement George Dunlap
  2016-07-15 18:02 ` [PATCH 3/3] xen: Remove buggy initial placement algorithm George Dunlap
@ 2016-07-16 15:48 ` Meng Xu
  2016-07-18  9:58 ` Dario Faggioli
  3 siblings, 0 replies; 33+ messages in thread
From: Meng Xu @ 2016-07-16 15:48 UTC (permalink / raw)
  To: George Dunlap; +Cc: xen-devel, Dario Faggioli, Anshul Makkar


[-- Attachment #1.1: Type: text/plain, Size: 642 bytes --]

On Fri, Jul 15, 2016 at 2:02 PM, George Dunlap <george.dunlap@citrix.com>
wrote:

> For sched_credit2, move the vcpu insert / remove / free functions near the
> domain
> insert / remove / alloc / free functions (and after cpu_pick).
>
> For sched_rt, move rt_cpu_pick() further up.
>
> This is pure code motion; no functional change.
>
> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
>

As to sched_rt.c,

Reviewed-by: Meng Xu <mengxu@cis.upenn.edu>​

​Thanks,

Meng​

------------
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania
http://www.cis.upenn.edu/~mengxu/

[-- Attachment #1.2: Type: text/html, Size: 1627 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/3] xen: Some code motion to avoid having to do forward-declaration
  2016-07-15 18:02 [PATCH 1/3] xen: Some code motion to avoid having to do forward-declaration George Dunlap
                   ` (2 preceding siblings ...)
  2016-07-16 15:48 ` [PATCH 1/3] xen: Some code motion to avoid having to do forward-declaration Meng Xu
@ 2016-07-18  9:58 ` Dario Faggioli
  2016-07-18 10:06   ` George Dunlap
  3 siblings, 1 reply; 33+ messages in thread
From: Dario Faggioli @ 2016-07-18  9:58 UTC (permalink / raw)
  To: George Dunlap, xen-devel; +Cc: Anshul Makkar, Meng Xu


[-- Attachment #1.1: Type: text/plain, Size: 1172 bytes --]

On Fri, 2016-07-15 at 19:02 +0100, George Dunlap wrote:
> For sched_credit2, move the vcpu insert / remove / free functions
> near the domain
> insert / remove / alloc / free functions (and after cpu_pick).
> 
> For sched_rt, move rt_cpu_pick() further up.
> 
> This is pure code motion; no functional change.
> 
> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> ---
> CC: Dario Faggioli <dario.faggioli@citrix.com>
> CC: Anshul Makkar <anshul.makkar@citrix.com>
> CC: Meng Xu <mengxu@cis.upenn.edu>
>
Acked-by: Dario Faggioli <dario.faggioli@citrix.com>

If this goes in (and it should), I'll have to rebase my Credit2 series
and resend it.

I'm fine with that, but I'll wait to see if people has comments, so
I'll take cara of both (comments and rebasing) in v3.

If there won't be comment, I'd just resend/provide a git branch with
the rebased patches.

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/3] xen: Remove buggy initial placement algorithm
  2016-07-16 13:55   ` Dario Faggioli
@ 2016-07-18 10:03     ` George Dunlap
  0 siblings, 0 replies; 33+ messages in thread
From: George Dunlap @ 2016-07-18 10:03 UTC (permalink / raw)
  To: Dario Faggioli, xen-devel
  Cc: Stefano Stabellini, Wei Liu, Andrew Cooper, Anshul Makkar,
	Ian Jackson, Tim Deegan, Meng Xu, Jan Beulich

On 16/07/16 14:55, Dario Faggioli wrote:
> On Fri, 2016-07-15 at 19:02 +0100, George Dunlap wrote:
>> The initial placement algorithm sometimes picks cpus outside of the
>> mask it's given, does a lot of unnecessary bitmasking, does its own
>> separate load calculation, and completely ignores vcpu hard and soft
>> affinities.  
>>
> Not to mention that I wouldn't even call what it does "load
> calculation".
> 
> It just counts the number of vcpus that are executing (or have executed
> their last instance) on each CPU, which tells very few about load. And
> it does that without any locking at all, which I see the reason why,
> but the net effect is that the final result comes from a wrong
> calculation done on unreliable data... I don't see how this could be
> more broken! :-P
> 
>> Just get rid of it and rely on the schedulers to do
>> initial placement.
>>
> This all makes a lot of sense to me, and I'm all for it, thanks for
> doing it! :-)
> 
>> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
>> ---
>> Since many of scheduler cpu_pick functions have a strong preference
>> to
>> just leave the cpu where it is (in particular, credit1 and rt), this
>> may cause some cpus to be overloaded when creating a lot of domains.
>> Arguably this should be fixed in the schedulers themselves.
>>
> Indeed. Still, maybe...
> 
>> @@ -691,7 +643,7 @@ long
>> do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>>                  continue;
>>  
>>              cpu = (i == 0) ?
>> -                default_vcpu0_location(online) :
>> +                cpumask_first(online) :
>>                  cpumask_cycle(d->vcpu[i-1]->processor, online);
>>
> ...cpumask_any() ?
> 
> It's a bit more expensive, but it at least would provided a less biased
> (toward lower CPU indexes) basis to the schedulers?

That sounds reasonable -- this is only called when vcpus are created;
and it's almost certainly faster than all the calculations done by
default_vcpu0_location().

 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/3] xen: Some code motion to avoid having to do forward-declaration
  2016-07-18  9:58 ` Dario Faggioli
@ 2016-07-18 10:06   ` George Dunlap
  0 siblings, 0 replies; 33+ messages in thread
From: George Dunlap @ 2016-07-18 10:06 UTC (permalink / raw)
  To: Dario Faggioli, xen-devel; +Cc: Anshul Makkar, Meng Xu

On 18/07/16 10:58, Dario Faggioli wrote:
> On Fri, 2016-07-15 at 19:02 +0100, George Dunlap wrote:
>> For sched_credit2, move the vcpu insert / remove / free functions
>> near the domain
>> insert / remove / alloc / free functions (and after cpu_pick).
>>
>> For sched_rt, move rt_cpu_pick() further up.
>>
>> This is pure code motion; no functional change.
>>
>> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
>> ---
>> CC: Dario Faggioli <dario.faggioli@citrix.com>
>> CC: Anshul Makkar <anshul.makkar@citrix.com>
>> CC: Meng Xu <mengxu@cis.upenn.edu>
>>
> Acked-by: Dario Faggioli <dario.faggioli@citrix.com>
> 
> If this goes in (and it should), I'll have to rebase my Credit2 series
> and resend it.
> 
> I'm fine with that, but I'll wait to see if people has comments, so
> I'll take cara of both (comments and rebasing) in v3.
> 
> If there won't be comment, I'd just resend/provide a git branch with
> the rebased patches.

So for one, your series was sent first, so if it's in decent shape it
should get priority.  For two, just rebasing this series should be a lot
easier than rebasing your entire series; so unless your series gets held
up for some reason, I think this should probably be rebased on top of yours.

 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-15 18:02 ` [PATCH 2/3] xen: Have schedulers revise initial placement George Dunlap
  2016-07-15 18:07   ` Andrew Cooper
@ 2016-07-18 10:28   ` Dario Faggioli
  2016-07-25 11:17     ` George Dunlap
  2016-07-25 14:35   ` Meng Xu
  2016-08-01 10:40   ` Jan Beulich
  3 siblings, 1 reply; 33+ messages in thread
From: Dario Faggioli @ 2016-07-18 10:28 UTC (permalink / raw)
  To: George Dunlap, xen-devel; +Cc: Anshul Makkar, Meng Xu, Jan Beulich


[-- Attachment #1.1: Type: text/plain, Size: 1505 bytes --]

On Fri, 2016-07-15 at 19:02 +0100, George Dunlap wrote:
> The generic domain creation logic in
> xen/common/domctl.c:default_vcpu0_location() attempts to try to do
> initial placement load-balancing by placing vcpu 0 on the least-busy
> non-primary hyperthread available.  Unfortunately, the logic can end
> up picking a pcpu that's not in the online mask.  When this is passed
> to a scheduler such which assumes that the initial assignment is
> valid, it causes a null pointer dereference looking up the runqueue.
> 
> Furthermore, this initial placement doesn't take into account hard or
> soft affinity, or any scheduler-specific knowledge (such as historic
> runqueue load, as in credit2).
> 
> To solve this, when inserting a vcpu, always call the per-scheduler
> "pick" function to revise the initial placement.  This will
> automatically take all knowledge the scheduler has into account.
> 
> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> ---
> CC: Dario Faggioli <dario.faggioli@citrix.com>
> CC: Anshul Makkar <anshul.makkar@citrix.com>
> CC: Meng Xu <mengxu@cis.upenn.edu>
> CC: Jan Beulich <jbeulich@suse.com>
>
Reviewed-by: Dario Faggioli <dario.faggioli@citrix.com>

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-16 14:12     ` Dario Faggioli
@ 2016-07-18 18:10       ` Andrew Cooper
  2016-07-18 18:55         ` Dario Faggioli
  0 siblings, 1 reply; 33+ messages in thread
From: Andrew Cooper @ 2016-07-18 18:10 UTC (permalink / raw)
  To: Dario Faggioli, George Dunlap, xen-devel
  Cc: Anshul Makkar, Meng Xu, Jan Beulich

On 16/07/16 15:12, Dario Faggioli wrote:
> On Fri, 2016-07-15 at 19:07 +0100, Andrew Cooper wrote:
>
>> None of the scheduler-accounting functions should be disabling
>> interrupts.
>>
> They don't. But you can't keep irq disabled for some operations and
> enabled for others, on the same lock (because of the irq-safety
> spinlock checks/enforcement).
>
> So you have to always keep IRQ enabled, for all scheduling operations,
> which is ok for _almost_ all of them, with the only exception of the
> wakeup of a vcpu.

I know that it is all or nothing.  What specific action about waking a
vcpu requires holding a lock?

If it is simply re-queueing the vcpu onto the runable queue, there are a
number of lockless queuing algorithms which can be used.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-18 18:10       ` Andrew Cooper
@ 2016-07-18 18:55         ` Dario Faggioli
  2016-07-18 21:36           ` Andrew Cooper
  0 siblings, 1 reply; 33+ messages in thread
From: Dario Faggioli @ 2016-07-18 18:55 UTC (permalink / raw)
  To: Andrew Cooper, George Dunlap, xen-devel
  Cc: Anshul Makkar, Meng Xu, Jan Beulich


[-- Attachment #1.1: Type: text/plain, Size: 1449 bytes --]

On Mon, 2016-07-18 at 19:10 +0100, Andrew Cooper wrote:
> On 16/07/16 15:12, Dario Faggioli wrote:
> > On Fri, 2016-07-15 at 19:07 +0100, Andrew Cooper wrote:
> > So you have to always keep IRQ enabled, for all scheduling
> > operations,
> > which is ok for _almost_ all of them, with the only exception of
> > the
> > wakeup of a vcpu.
> I know that it is all or nothing.  What specific action about waking
> a
> vcpu requires holding a lock?
> 
> If it is simply re-queueing the vcpu onto the runable queue, there
> are a
> number of lockless queuing algorithms which can be used.
> 
Yes, it's vcpu_wake() that does vcpu_schedule_lock_irqsave() before,
among other things, calling SCHED_OP(wake, v).

What the implementation of that hook does, is scheduler dependant. In
most of the case, the core of it is, as you say, enqueueing the vcpu,
but that may not be all.

For instance, something that, all the schedulers, after queueing, do is
tickling pCPUs for having them pick up the queued work, and that also
requires serialization to be correct and effective (i.e., "just"
turning and mandating runqueues to be lockless would not be enough).

> ~Andrew
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-18 18:55         ` Dario Faggioli
@ 2016-07-18 21:36           ` Andrew Cooper
  2016-07-19  7:14             ` Dario Faggioli
  0 siblings, 1 reply; 33+ messages in thread
From: Andrew Cooper @ 2016-07-18 21:36 UTC (permalink / raw)
  To: Dario Faggioli, George Dunlap, xen-devel
  Cc: Anshul Makkar, Meng Xu, Jan Beulich

On 18/07/2016 19:55, Dario Faggioli wrote:
> On Mon, 2016-07-18 at 19:10 +0100, Andrew Cooper wrote:
>> On 16/07/16 15:12, Dario Faggioli wrote:
>>> On Fri, 2016-07-15 at 19:07 +0100, Andrew Cooper wrote:
>>> So you have to always keep IRQ enabled, for all scheduling
>>> operations,
>>> which is ok for _almost_ all of them, with the only exception of
>>> the
>>> wakeup of a vcpu.
>> I know that it is all or nothing.  What specific action about waking
>> a
>> vcpu requires holding a lock?
>>
>> If it is simply re-queueing the vcpu onto the runable queue, there
>> are a
>> number of lockless queuing algorithms which can be used.
>>
> Yes, it's vcpu_wake() that does vcpu_schedule_lock_irqsave() before,
> among other things, calling SCHED_OP(wake, v).

Right - this looks easy to fix.  Use a per-pcpu single linked list,
which can be mutated safely with cmpxchg() rather than locks, have
vcpu_wake() add v to the linked list, and schedule itself a
SCHEDULE_SOFTIRQ, (possibly a new SCHEDULE_WAKE_SOFTIRQ with higher
priority than SCHEDULE_SOFTIRQ).

Then in the schedule softirq can take the vcpu schedule lock without
disabling interrupts and run the internals of what is currently
vcpu_wake().  The current content of vcpu_wake() very large for what is
typically executed from interrupt context.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-18 21:36           ` Andrew Cooper
@ 2016-07-19  7:14             ` Dario Faggioli
  0 siblings, 0 replies; 33+ messages in thread
From: Dario Faggioli @ 2016-07-19  7:14 UTC (permalink / raw)
  To: Andrew Cooper, George Dunlap, xen-devel
  Cc: Anshul Makkar, Meng Xu, Jan Beulich


[-- Attachment #1.1: Type: text/plain, Size: 2844 bytes --]

On Mon, 2016-07-18 at 22:36 +0100, Andrew Cooper wrote:
> On 18/07/2016 19:55, Dario Faggioli wrote:
> > 
> > On Mon, 2016-07-18 at 19:10 +0100, Andrew Cooper wrote:
> > > 
> > > On 16/07/16 15:12, Dario Faggioli wrote:
> > > > 
> > > > On Fri, 2016-07-15 at 19:07 +0100, Andrew Cooper wrote:
> > > > So you have to always keep IRQ enabled, for all scheduling
> > > > operations,
> > > > which is ok for _almost_ all of them, with the only exception
> > > > of
> > > > the
> > > > wakeup of a vcpu.
> > > I know that it is all or nothing.  What specific action about
> > > waking
> > > a
> > > vcpu requires holding a lock?
> > > 
> > > If it is simply re-queueing the vcpu onto the runable queue,
> > > there
> > > are a
> > > number of lockless queuing algorithms which can be used.
> > > 
> > Yes, it's vcpu_wake() that does vcpu_schedule_lock_irqsave()
> > before,
> > among other things, calling SCHED_OP(wake, v).
> Right - this looks easy to fix.  Use a per-pcpu single linked list,
> which can be mutated safely with cmpxchg() rather than locks, have
> vcpu_wake() add v to the linked list, and schedule itself a
> SCHEDULE_SOFTIRQ, (possibly a new SCHEDULE_WAKE_SOFTIRQ with higher
> priority than SCHEDULE_SOFTIRQ).
> 
That's exactly what I'm doing in one of the variant I've implemented so
far (with a dedicated lock, rather than cmpxchg, but I was indeed
looking at removing that):

http://xenbits.xen.org/gitweb/?p=people/dariof/xen.git;a=blobdiff;f=xen/include/xen/softirq.h;h=e62c247fdc41dd4e36ceb5f8094647fa62530c56;hp=0895a162031b64ad6acff6be9d17707dad7a89bf;hb=431423e69c9fe4503d26bd4c657699284e0223b7;hpb=a282bcd6f7751d2ff428ca6c1e14120aa6fcc5fc

:-)

> Then in the schedule softirq can take the vcpu schedule lock without
> disabling interrupts and run the internals of what is currently
> vcpu_wake().  The current content of vcpu_wake() very large for what
> is
> typically executed from interrupt context.
> 
Totally agree.

Something I was reasoning on, and trying to assess by means of
benchmarks is, for instance, when taking out the vcpus from the queue
and waking them, whether to do that always "to completion" (considering
that it's probably even possible that new vcpus are added to the queue
itself while I'm trying to drain it), or in batches (and after each
batch, let Xen do something else and re-trigger the softirq).

And this (and other similar "subtleties") is where the numbers I could
get were not conclusive.

I'll dust off the code and let you know. :-)

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-18 10:28   ` Dario Faggioli
@ 2016-07-25 11:17     ` George Dunlap
  2016-07-25 14:36       ` Meng Xu
  2016-07-26  9:17       ` Dario Faggioli
  0 siblings, 2 replies; 33+ messages in thread
From: George Dunlap @ 2016-07-25 11:17 UTC (permalink / raw)
  To: Dario Faggioli, xen-devel; +Cc: Anshul Makkar, Meng Xu, Jan Beulich

On 18/07/16 11:28, Dario Faggioli wrote:
> On Fri, 2016-07-15 at 19:02 +0100, George Dunlap wrote:
>> The generic domain creation logic in
>> xen/common/domctl.c:default_vcpu0_location() attempts to try to do
>> initial placement load-balancing by placing vcpu 0 on the least-busy
>> non-primary hyperthread available.  Unfortunately, the logic can end
>> up picking a pcpu that's not in the online mask.  When this is passed
>> to a scheduler such which assumes that the initial assignment is
>> valid, it causes a null pointer dereference looking up the runqueue.
>>
>> Furthermore, this initial placement doesn't take into account hard or
>> soft affinity, or any scheduler-specific knowledge (such as historic
>> runqueue load, as in credit2).
>>
>> To solve this, when inserting a vcpu, always call the per-scheduler
>> "pick" function to revise the initial placement.  This will
>> automatically take all knowledge the scheduler has into account.
>>
>> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
>> ---
>> CC: Dario Faggioli <dario.faggioli@citrix.com>
>> CC: Anshul Makkar <anshul.makkar@citrix.com>
>> CC: Meng Xu <mengxu@cis.upenn.edu>
>> CC: Jan Beulich <jbeulich@suse.com>
>>
> Reviewed-by: Dario Faggioli <dario.faggioli@citrix.com>

Just to clarify - does this cover the sched_rt.c changes as well?

Thanks,
 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-15 18:02 ` [PATCH 2/3] xen: Have schedulers revise initial placement George Dunlap
  2016-07-15 18:07   ` Andrew Cooper
  2016-07-18 10:28   ` Dario Faggioli
@ 2016-07-25 14:35   ` Meng Xu
  2016-08-01 10:40   ` Jan Beulich
  3 siblings, 0 replies; 33+ messages in thread
From: Meng Xu @ 2016-07-25 14:35 UTC (permalink / raw)
  To: George Dunlap; +Cc: xen-devel, Dario Faggioli, Anshul Makkar, Jan Beulich

On Fri, Jul 15, 2016 at 2:02 PM, George Dunlap <george.dunlap@citrix.com> wrote:
>
> The generic domain creation logic in
> xen/common/domctl.c:default_vcpu0_location() attempts to try to do
> initial placement load-balancing by placing vcpu 0 on the least-busy
> non-primary hyperthread available.  Unfortunately, the logic can end
> up picking a pcpu that's not in the online mask.  When this is passed
> to a scheduler such which assumes that the initial assignment is
> valid, it causes a null pointer dereference looking up the runqueue.
>
> Furthermore, this initial placement doesn't take into account hard or
> soft affinity, or any scheduler-specific knowledge (such as historic
> runqueue load, as in credit2).
>
> To solve this, when inserting a vcpu, always call the per-scheduler
> "pick" function to revise the initial placement.  This will
> automatically take all knowledge the scheduler has into account.
>
> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> ---
> CC: Dario Faggioli <dario.faggioli@citrix.com>
> CC: Anshul Makkar <anshul.makkar@citrix.com>
> CC: Meng Xu <mengxu@cis.upenn.edu>
> CC: Jan Beulich <jbeulich@suse.com>
> ---
>  xen/common/sched_credit.c  |  3 +++
>  xen/common/sched_credit2.c | 12 ++++++++++--
>  xen/common/sched_rt.c      |  3 +++
>  3 files changed, 16 insertions(+), 2 deletions(-)


As to xen/common/sched_rt.c,

Reviewed-by: Meng Xu <mengxu@cis.upenn.edu>

------
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania
http://www.cis.upenn.edu/~mengxu/

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-25 11:17     ` George Dunlap
@ 2016-07-25 14:36       ` Meng Xu
  2016-07-26  9:17       ` Dario Faggioli
  1 sibling, 0 replies; 33+ messages in thread
From: Meng Xu @ 2016-07-25 14:36 UTC (permalink / raw)
  To: George Dunlap; +Cc: xen-devel, Dario Faggioli, Anshul Makkar, Jan Beulich

On Mon, Jul 25, 2016 at 7:17 AM, George Dunlap <george.dunlap@citrix.com> wrote:
> On 18/07/16 11:28, Dario Faggioli wrote:
>> On Fri, 2016-07-15 at 19:02 +0100, George Dunlap wrote:
>>> The generic domain creation logic in
>>> xen/common/domctl.c:default_vcpu0_location() attempts to try to do
>>> initial placement load-balancing by placing vcpu 0 on the least-busy
>>> non-primary hyperthread available.  Unfortunately, the logic can end
>>> up picking a pcpu that's not in the online mask.  When this is passed
>>> to a scheduler such which assumes that the initial assignment is
>>> valid, it causes a null pointer dereference looking up the runqueue.
>>>
>>> Furthermore, this initial placement doesn't take into account hard or
>>> soft affinity, or any scheduler-specific knowledge (such as historic
>>> runqueue load, as in credit2).
>>>
>>> To solve this, when inserting a vcpu, always call the per-scheduler
>>> "pick" function to revise the initial placement.  This will
>>> automatically take all knowledge the scheduler has into account.
>>>
>>> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
>>> ---
>>> CC: Dario Faggioli <dario.faggioli@citrix.com>
>>> CC: Anshul Makkar <anshul.makkar@citrix.com>
>>> CC: Meng Xu <mengxu@cis.upenn.edu>
>>> CC: Jan Beulich <jbeulich@suse.com>
>>>
>> Reviewed-by: Dario Faggioli <dario.faggioli@citrix.com>
>
> Just to clarify - does this cover the sched_rt.c changes as well?

Hi George,

I had a look at the sched_rt.c, it looks good to me. I sent out my
review tag for the sched_rt.c, just now.

Meng

-----------
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania
http://www.cis.upenn.edu/~mengxu/

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-25 11:17     ` George Dunlap
  2016-07-25 14:36       ` Meng Xu
@ 2016-07-26  9:17       ` Dario Faggioli
  1 sibling, 0 replies; 33+ messages in thread
From: Dario Faggioli @ 2016-07-26  9:17 UTC (permalink / raw)
  To: George Dunlap, xen-devel; +Cc: Anshul Makkar, Meng Xu, Jan Beulich


[-- Attachment #1.1: Type: text/plain, Size: 1036 bytes --]

On Mon, 2016-07-25 at 12:17 +0100, George Dunlap wrote:
> On 18/07/16 11:28, Dario Faggioli wrote:
> > 
> > On Fri, 2016-07-15 at 19:02 +0100, George Dunlap wrote:
> > > 
> > > Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> > > ---
> > > CC: Dario Faggioli <dario.faggioli@citrix.com>
> > > CC: Anshul Makkar <anshul.makkar@citrix.com>
> > > CC: Meng Xu <mengxu@cis.upenn.edu>
> > > CC: Jan Beulich <jbeulich@suse.com>
> > > 
> > Reviewed-by: Dario Faggioli <dario.faggioli@citrix.com>
> Just to clarify - does this cover the sched_rt.c changes as well?
> 
Oh, sorry, I missed this yesterday. :-(

Yes, it did, but in any case, Meng sent in his own Reviewed-by (and did
it for v2 also already), so, whatever. :-P

Thanks and Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-07-15 18:02 ` [PATCH 2/3] xen: Have schedulers revise initial placement George Dunlap
                     ` (2 preceding siblings ...)
  2016-07-25 14:35   ` Meng Xu
@ 2016-08-01 10:40   ` Jan Beulich
  2016-08-01 12:32     ` Dario Faggioli
  3 siblings, 1 reply; 33+ messages in thread
From: Jan Beulich @ 2016-08-01 10:40 UTC (permalink / raw)
  To: George Dunlap; +Cc: Dario Faggioli, Anshul Makkar, MengXu, xen-devel

>>> On 15.07.16 at 20:02, <george.dunlap@citrix.com> wrote:
> The generic domain creation logic in
> xen/common/domctl.c:default_vcpu0_location() attempts to try to do
> initial placement load-balancing by placing vcpu 0 on the least-busy
> non-primary hyperthread available.  Unfortunately, the logic can end
> up picking a pcpu that's not in the online mask.  When this is passed
> to a scheduler such which assumes that the initial assignment is
> valid, it causes a null pointer dereference looking up the runqueue.
> 
> Furthermore, this initial placement doesn't take into account hard or
> soft affinity, or any scheduler-specific knowledge (such as historic
> runqueue load, as in credit2).
> 
> To solve this, when inserting a vcpu, always call the per-scheduler
> "pick" function to revise the initial placement.  This will
> automatically take all knowledge the scheduler has into account.
> 
> Signed-off-by: George Dunlap <george.dunlap@citrix.com>

Should this and patch 3 be backported?

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-08-01 10:40   ` Jan Beulich
@ 2016-08-01 12:32     ` Dario Faggioli
  2016-08-05 13:24       ` Jan Beulich
  0 siblings, 1 reply; 33+ messages in thread
From: Dario Faggioli @ 2016-08-01 12:32 UTC (permalink / raw)
  To: Jan Beulich, George Dunlap; +Cc: xen-devel, Anshul Makkar, MengXu


[-- Attachment #1.1: Type: text/plain, Size: 775 bytes --]

On Mon, 2016-08-01 at 04:40 -0600, Jan Beulich wrote:
> > > > On 15.07.16 at 20:02, <george.dunlap@citrix.com> wrote:
> > 
> > To solve this, when inserting a vcpu, always call the per-scheduler
> > "pick" function to revise the initial placement.  This will
> > automatically take all knowledge the scheduler has into account.
> > 
> > Signed-off-by: George Dunlap <george.dunlap@citrix.com>
>
> Should this and patch 3 be backported?
> 
Yes, I think they're good backporting candidates.

Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-08-01 12:32     ` Dario Faggioli
@ 2016-08-05 13:24       ` Jan Beulich
  2016-08-05 14:09         ` Dario Faggioli
                           ` (3 more replies)
  0 siblings, 4 replies; 33+ messages in thread
From: Jan Beulich @ 2016-08-05 13:24 UTC (permalink / raw)
  To: Dario Faggioli, George Dunlap; +Cc: xen-devel, Anshul Makkar, MengXu

>>> On 01.08.16 at 14:32, <dario.faggioli@citrix.com> wrote:
> On Mon, 2016-08-01 at 04:40 -0600, Jan Beulich wrote:
>> > > > On 15.07.16 at 20:02, <george.dunlap@citrix.com> wrote:
>> > 
>> > To solve this, when inserting a vcpu, always call the per-scheduler
>> > "pick" function to revise the initial placement.  This will
>> > automatically take all knowledge the scheduler has into account.
>> > 
>> > Signed-off-by: George Dunlap <george.dunlap@citrix.com>
>>
>> Should this and patch 3 be backported?
>> 
> Yes, I think they're good backporting candidates.

Well, they appear to work fine on 4.7, but putting them onto 4.5
causes an early boot crash (BUG_ON( cpu != svc->vcpu->processor )
in __runq_insert()). Pulling in e59321d154 ("credit: remove cpu
argument to __runq_insert()") obviously makes that crash go
away, just to, a little later, hit the similar one close to the top of
csched_load_balance().

I'd really like to have those backported, but I have to ask one
of you to identify which prereq-s are needed on 4.6 and 4.5
(I'll revert them from 4.5 right away, but I'll wait for an osstest
flight to confirm the same issue exists on 4.6). On 4.5 I'd then
additionally depend on you to indicate whether sedf needs
some additional adjustment.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-08-05 13:24       ` Jan Beulich
@ 2016-08-05 14:09         ` Dario Faggioli
  2016-08-05 14:44           ` Jan Beulich
  2016-08-11 14:59         ` Dario Faggioli
                           ` (2 subsequent siblings)
  3 siblings, 1 reply; 33+ messages in thread
From: Dario Faggioli @ 2016-08-05 14:09 UTC (permalink / raw)
  To: Jan Beulich, George Dunlap; +Cc: xen-devel, Anshul Makkar, MengXu


[-- Attachment #1.1: Type: text/plain, Size: 1572 bytes --]

On Fri, 2016-08-05 at 07:24 -0600, Jan Beulich wrote:
> > > > On 01.08.16 at 14:32, <dario.faggioli@citrix.com> wrote:
> > On Mon, 2016-08-01 at 04:40 -0600, Jan Beulich wrote:
> > > > > > On 15.07.16 at 20:02, <george.dunlap@citrix.com> wrote:
> > > > Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> > > Should this and patch 3 be backported?
> > > 
> > Yes, I think they're good backporting candidates.
> Well, they appear to work fine on 4.7, but putting them onto 4.5
> causes an early boot crash (BUG_ON( cpu != svc->vcpu->processor )
> in __runq_insert()). Pulling in e59321d154 ("credit: remove cpu
> argument to __runq_insert()") obviously makes that crash go
> away, just to, a little later, hit the similar one close to the top
> of
> csched_load_balance().
> 
Ah, I see.. Thanks for trying and letting us know.

> I'd really like to have those backported, but I have to ask one
> of you to identify which prereq-s are needed on 4.6 and 4.5
> (I'll revert them from 4.5 right away, but I'll wait for an osstest
> flight to confirm the same issue exists on 4.6). On 4.5 I'd then
> additionally depend on you to indicate whether sedf needs
> some additional adjustment.
> 
Ok. I can't, immediately.

But if Monday is fine, I'm happy to take care of this.

Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-08-05 14:09         ` Dario Faggioli
@ 2016-08-05 14:44           ` Jan Beulich
  0 siblings, 0 replies; 33+ messages in thread
From: Jan Beulich @ 2016-08-05 14:44 UTC (permalink / raw)
  To: Dario Faggioli; +Cc: xen-devel, Anshul Makkar, MengXu, George Dunlap

>>> On 05.08.16 at 16:09, <dario.faggioli@citrix.com> wrote:
> On Fri, 2016-08-05 at 07:24 -0600, Jan Beulich wrote:
>> > > > On 01.08.16 at 14:32, <dario.faggioli@citrix.com> wrote:
>> > On Mon, 2016-08-01 at 04:40 -0600, Jan Beulich wrote:
>> > > > > > On 15.07.16 at 20:02, <george.dunlap@citrix.com> wrote:
>> > > > Signed-off-by: George Dunlap <george.dunlap@citrix.com>
>> > > Should this and patch 3 be backported?
>> > > 
>> > Yes, I think they're good backporting candidates.
>> Well, they appear to work fine on 4.7, but putting them onto 4.5
>> causes an early boot crash (BUG_ON( cpu != svc->vcpu->processor )
>> in __runq_insert()). Pulling in e59321d154 ("credit: remove cpu
>> argument to __runq_insert()") obviously makes that crash go
>> away, just to, a little later, hit the similar one close to the top
>> of
>> csched_load_balance().
>> 
> Ah, I see.. Thanks for trying and letting us know.
> 
>> I'd really like to have those backported, but I have to ask one
>> of you to identify which prereq-s are needed on 4.6 and 4.5
>> (I'll revert them from 4.5 right away, but I'll wait for an osstest
>> flight to confirm the same issue exists on 4.6). On 4.5 I'd then
>> additionally depend on you to indicate whether sedf needs
>> some additional adjustment.
>> 
> Ok. I can't, immediately.
> 
> But if Monday is fine, I'm happy to take care of this.

Oh, of course, any time next week would be fine. I'd like to have
this for 4.5.4, but in the worst case we'll ship that without it.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-08-05 13:24       ` Jan Beulich
  2016-08-05 14:09         ` Dario Faggioli
@ 2016-08-11 14:59         ` Dario Faggioli
  2016-08-11 15:51           ` Andrew Cooper
  2016-08-12  1:59         ` dependences for backporting to 4.6 [was: Re: [PATCH 2/3] xen: Have schedulers revise initial placement] Dario Faggioli
  2016-08-12  8:58         ` dependences for backporting to 4.5 " Dario Faggioli
  3 siblings, 1 reply; 33+ messages in thread
From: Dario Faggioli @ 2016-08-11 14:59 UTC (permalink / raw)
  To: Jan Beulich, George Dunlap; +Cc: xen-devel, Anshul Makkar, MengXu


[-- Attachment #1.1: Type: text/plain, Size: 6464 bytes --]

On Fri, 2016-08-05 at 07:24 -0600, Jan Beulich wrote:
> I'd really like to have those backported, but I have to ask one
> of you to identify which prereq-s are needed on 4.6 and 4.5
> (I'll revert them from 4.5 right away, but I'll wait for an osstest
> flight to confirm the same issue exists on 4.6). 
>
Hey, I could only start working on this this morning (sorry for the
delay), and I'll continue tomorrow but, at least here, staging-4.6
(plus the patches!) crashes like this:

(XEN) [    0.000000] ----[ Xen-4.6.4-pre  x86_64  debug=y  Not tainted ]----
(XEN) [    0.000000] CPU:    0
(XEN) [    0.000000] RIP:    e008:[<ffff82d0801238c4>] _csched_cpu_pick+0x156/0x612
(XEN) [    0.000000] RFLAGS: 0000000000010046   CONTEXT: hypervisor
(XEN) [    0.000000] rax: 0000000000000040   rbx: 0000000000000040   rcx: 0000000000000000
(XEN) [    0.000000] rdx: 000000000000003f   rsi: 0000000000000040   rdi: 0000000000000040
(XEN) [    0.000000] rbp: ffff82d0802f7d68   rsp: ffff82d0802f7c78   r8:  0000000000000000
(XEN) [    0.000000] r9:  0000000000000001   r10: 0000000000000001   r11: 00000000000000b4
(XEN) [    0.000000] r12: 0000000000000040   r13: ffff83032152bf40   r14: ffff82d08033ae40
(XEN) [    0.000000] r15: ffff8300dbdf4000   cr0: 0000000080050033   cr4: 00000000000006e0
(XEN) [    0.000000] cr3: 00000000dba9f000   cr2: 0000000000000000
(XEN) [    0.000000] ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
(XEN) [    0.000000] Xen stack trace from rsp=ffff82d0802f7c78:
(XEN) [    0.000000]    0000000000000046 0000000200000092 ffff82d08033ae48 ffff82d08028b020
(XEN) [    0.000000]    0000000000000000 ffff82d0802ff040 0000000100000001 ffff82d08033ae40
(XEN) [    0.000000]    0000000000000097 ffff82d0802f7cd8 0000000000000006 ffff82d0802f7ce8
(XEN) [    0.000000]    ffff82d08012d027 ffff830321532000 ffff82d0802f7d28 ffff82d08013c779
(XEN) [    0.000000]    0000000000000000 0000000000000010 0000000000000048 0000000000000048
(XEN) [    0.000000]    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN) [    0.000000]    ffff82d0802f7d78 ffff8300dbdf4000 ffff83032152a000 ffff83032152bf40
(XEN) [    0.000000]    0000000000000000 ffff82d08028e020 ffff82d0802f7d78 ffff82d080123d8e
(XEN) [    0.000000]    ffff82d0802f7db8 ffff82d080123db0 ffff83032152a000 ffff8300dbdf4000
(XEN) [    0.000000]    ffff83032152a000 0000000000000000 0000000000000000 ffff82d08028e020
(XEN) [    0.000000]    ffff82d0802f7de8 ffff82d080129e8d ffff82d0802f7de8 ffff82d08013cda0
(XEN) [    0.000000]    ffff8300dbdf4000 ffff83032152a000 ffff82d0802f7e18 ffff82d080105f59
(XEN) [    0.000000]    ffff82d0802a1720 ffff82d0802a1718 ffff82d0802a1720 ffff82d0802daa60
(XEN) [    0.000000]    ffff82d0802f7e48 ffff82d0802a8145 0000000000000002 ffff83032152b610
(XEN) [    0.000000]    0000000000000002 0000000000000001 ffff82d0802f7f08 ffff82d0802c5c6f
(XEN) [    0.000000]    0000000000000000 0000000000100000 00000000014ae000 0000000000324000
(XEN) [    0.000000]    0080016300000000 0000000300000015 0000000000000000 0000000000000010
(XEN) [    0.000000]    ffff83000008dd50 ffff83000008df20 0000000000000002 ffff83000008dfb0
(XEN) [    0.000000]    0000000800000000 000000010000006e 0000000000000003 00000000000002f8
(XEN) [    0.000000]    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN) [    0.000000] Xen call trace:
(XEN) [    0.000000]    [<ffff82d0801238c4>] _csched_cpu_pick+0x156/0x612
(XEN) [    0.000000]    [<ffff82d080123d8e>] csched_cpu_pick+0xe/0x10
(XEN) [    0.000000]    [<ffff82d080123db0>] csched_vcpu_insert+0x20/0x145
(XEN) [    0.000000]    [<ffff82d080129e8d>] sched_init_vcpu+0x1d1/0x218
(XEN) [    0.000000]    [<ffff82d080105f59>] alloc_vcpu+0x1ba/0x2a4
(XEN) [    0.000000]    [<ffff82d0802a8145>] scheduler_init+0x1b0/0x271
(XEN) [    0.000000]    [<ffff82d0802c5c6f>] __start_xen+0x1f85/0x2550
(XEN) [    0.000000]    [<ffff82d080100073>] __high_start+0x53/0x55
(XEN) [    0.000000] 
(XEN) [    0.000000] 
(XEN) [    0.000000] ****************************************
(XEN) [    0.000000] Panic on CPU 0:
(XEN) [    0.000000] Assertion 'cpu < nr_cpu_ids' failed at ...e/SOURCES/xen/xen.git/xen/include/xen/cpumask.h:97

Which, I think needs at least this hunk (from 6b53bb4ab3c9  "sched:
better handle (not) inserting idle vCPUs in runqueues"):

diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 2beebe8..fddcd52 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -240,20 +240,22 @@ int sched_init_vcpu(struct vcpu *v, unsigned int processor)
     init_timer(&v->poll_timer, poll_timer_fn,
                v, v->processor);
 
-    /* Idle VCPUs are scheduled immediately. */
+    v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv);
+    if ( v->sched_priv == NULL )
+        return 1;
+
+    TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
+
+    /* Idle VCPUs are scheduled immediately, so don't put them in runqueue. */
     if ( is_idle_domain(d) )
     {
         per_cpu(schedule_data, v->processor).curr = v;
         v->is_running = 1;
     }
-
-    TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
-
-    v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv);
-    if ( v->sched_priv == NULL )
-        return 1;
-
-    SCHED_OP(DOM2OP(d), insert_vcpu, v);
+    else
+    {
+        SCHED_OP(DOM2OP(d), insert_vcpu, v);
+    }
 
     return 0;
 }

So, yeah, it's proving a little more complicated than how I thought it
would have, just by looking at the patches. :-/

Will let know.

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-08-11 14:59         ` Dario Faggioli
@ 2016-08-11 15:51           ` Andrew Cooper
  2016-08-11 23:35             ` Dario Faggioli
  0 siblings, 1 reply; 33+ messages in thread
From: Andrew Cooper @ 2016-08-11 15:51 UTC (permalink / raw)
  To: Dario Faggioli, Jan Beulich, George Dunlap
  Cc: xen-devel, Anshul Makkar, MengXu

On 11/08/16 15:59, Dario Faggioli wrote:
> On Fri, 2016-08-05 at 07:24 -0600, Jan Beulich wrote:
>> I'd really like to have those backported, but I have to ask one
>> of you to identify which prereq-s are needed on 4.6 and 4.5
>> (I'll revert them from 4.5 right away, but I'll wait for an osstest
>> flight to confirm the same issue exists on 4.6). 
>>
> Hey, I could only start working on this this morning (sorry for the
> delay), and I'll continue tomorrow but, at least here, staging-4.6
> (plus the patches!) crashes like this:
>
> (XEN) [    0.000000] ----[ Xen-4.6.4-pre  x86_64  debug=y  Not tainted ]----
> (XEN) [    0.000000] CPU:    0
> (XEN) [    0.000000] RIP:    e008:[<ffff82d0801238c4>] _csched_cpu_pick+0x156/0x612
> (XEN) [    0.000000] RFLAGS: 0000000000010046   CONTEXT: hypervisor
> (XEN) [    0.000000] rax: 0000000000000040   rbx: 0000000000000040   rcx: 0000000000000000
> (XEN) [    0.000000] rdx: 000000000000003f   rsi: 0000000000000040   rdi: 0000000000000040
> (XEN) [    0.000000] rbp: ffff82d0802f7d68   rsp: ffff82d0802f7c78   r8:  0000000000000000
> (XEN) [    0.000000] r9:  0000000000000001   r10: 0000000000000001   r11: 00000000000000b4
> (XEN) [    0.000000] r12: 0000000000000040   r13: ffff83032152bf40   r14: ffff82d08033ae40
> (XEN) [    0.000000] r15: ffff8300dbdf4000   cr0: 0000000080050033   cr4: 00000000000006e0
> (XEN) [    0.000000] cr3: 00000000dba9f000   cr2: 0000000000000000
> (XEN) [    0.000000] ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
> (XEN) [    0.000000] Xen stack trace from rsp=ffff82d0802f7c78:
> (XEN) [    0.000000]    0000000000000046 0000000200000092 ffff82d08033ae48 ffff82d08028b020
> (XEN) [    0.000000]    0000000000000000 ffff82d0802ff040 0000000100000001 ffff82d08033ae40
> (XEN) [    0.000000]    0000000000000097 ffff82d0802f7cd8 0000000000000006 ffff82d0802f7ce8
> (XEN) [    0.000000]    ffff82d08012d027 ffff830321532000 ffff82d0802f7d28 ffff82d08013c779
> (XEN) [    0.000000]    0000000000000000 0000000000000010 0000000000000048 0000000000000048
> (XEN) [    0.000000]    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN) [    0.000000]    ffff82d0802f7d78 ffff8300dbdf4000 ffff83032152a000 ffff83032152bf40
> (XEN) [    0.000000]    0000000000000000 ffff82d08028e020 ffff82d0802f7d78 ffff82d080123d8e
> (XEN) [    0.000000]    ffff82d0802f7db8 ffff82d080123db0 ffff83032152a000 ffff8300dbdf4000
> (XEN) [    0.000000]    ffff83032152a000 0000000000000000 0000000000000000 ffff82d08028e020
> (XEN) [    0.000000]    ffff82d0802f7de8 ffff82d080129e8d ffff82d0802f7de8 ffff82d08013cda0
> (XEN) [    0.000000]    ffff8300dbdf4000 ffff83032152a000 ffff82d0802f7e18 ffff82d080105f59
> (XEN) [    0.000000]    ffff82d0802a1720 ffff82d0802a1718 ffff82d0802a1720 ffff82d0802daa60
> (XEN) [    0.000000]    ffff82d0802f7e48 ffff82d0802a8145 0000000000000002 ffff83032152b610
> (XEN) [    0.000000]    0000000000000002 0000000000000001 ffff82d0802f7f08 ffff82d0802c5c6f
> (XEN) [    0.000000]    0000000000000000 0000000000100000 00000000014ae000 0000000000324000
> (XEN) [    0.000000]    0080016300000000 0000000300000015 0000000000000000 0000000000000010
> (XEN) [    0.000000]    ffff83000008dd50 ffff83000008df20 0000000000000002 ffff83000008dfb0
> (XEN) [    0.000000]    0000000800000000 000000010000006e 0000000000000003 00000000000002f8
> (XEN) [    0.000000]    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN) [    0.000000] Xen call trace:
> (XEN) [    0.000000]    [<ffff82d0801238c4>] _csched_cpu_pick+0x156/0x612
> (XEN) [    0.000000]    [<ffff82d080123d8e>] csched_cpu_pick+0xe/0x10
> (XEN) [    0.000000]    [<ffff82d080123db0>] csched_vcpu_insert+0x20/0x145
> (XEN) [    0.000000]    [<ffff82d080129e8d>] sched_init_vcpu+0x1d1/0x218
> (XEN) [    0.000000]    [<ffff82d080105f59>] alloc_vcpu+0x1ba/0x2a4
> (XEN) [    0.000000]    [<ffff82d0802a8145>] scheduler_init+0x1b0/0x271
> (XEN) [    0.000000]    [<ffff82d0802c5c6f>] __start_xen+0x1f85/0x2550
> (XEN) [    0.000000]    [<ffff82d080100073>] __high_start+0x53/0x55
> (XEN) [    0.000000] 
> (XEN) [    0.000000] 
> (XEN) [    0.000000] ****************************************
> (XEN) [    0.000000] Panic on CPU 0:
> (XEN) [    0.000000] Assertion 'cpu < nr_cpu_ids' failed at ...e/SOURCES/xen/xen.git/xen/include/xen/cpumask.h:97
>
> Which, I think needs at least this hunk (from 6b53bb4ab3c9  "sched:
> better handle (not) inserting idle vCPUs in runqueues"):
>
> diff --git a/xen/common/schedule.c b/xen/common/schedule.c
> index 2beebe8..fddcd52 100644
> --- a/xen/common/schedule.c
> +++ b/xen/common/schedule.c
> @@ -240,20 +240,22 @@ int sched_init_vcpu(struct vcpu *v, unsigned int processor)
>      init_timer(&v->poll_timer, poll_timer_fn,
>                 v, v->processor);
>  
> -    /* Idle VCPUs are scheduled immediately. */
> +    v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv);
> +    if ( v->sched_priv == NULL )
> +        return 1;
> +
> +    TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
> +
> +    /* Idle VCPUs are scheduled immediately, so don't put them in runqueue. */
>      if ( is_idle_domain(d) )
>      {
>          per_cpu(schedule_data, v->processor).curr = v;
>          v->is_running = 1;
>      }
> -
> -    TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
> -
> -    v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv);
> -    if ( v->sched_priv == NULL )
> -        return 1;
> -
> -    SCHED_OP(DOM2OP(d), insert_vcpu, v);
> +    else
> +    {
> +        SCHED_OP(DOM2OP(d), insert_vcpu, v);
> +    }
>  
>      return 0;
>  }
>
> So, yeah, it's proving a little more complicated than how I thought it
> would have, just by looking at the patches. :-/
>
> Will let know.

FWIW, this looks very similar to the regression I just raised against
Xen 4.7 "[Xen-devel] Scheduler regression in 4.7".  The stack traces are
suspiciously similar.  I expect they have the same root cause.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/3] xen: Have schedulers revise initial placement
  2016-08-11 15:51           ` Andrew Cooper
@ 2016-08-11 23:35             ` Dario Faggioli
  0 siblings, 0 replies; 33+ messages in thread
From: Dario Faggioli @ 2016-08-11 23:35 UTC (permalink / raw)
  To: Andrew Cooper, Jan Beulich, George Dunlap
  Cc: xen-devel, Anshul Makkar, MengXu


[-- Attachment #1.1: Type: text/plain, Size: 3807 bytes --]

On Thu, 2016-08-11 at 16:51 +0100, Andrew Cooper wrote:
> On 11/08/16 15:59, Dario Faggioli wrote:
> > 
> > Which, I think needs at least this hunk (from 6b53bb4ab3c9  "sched:
> > better handle (not) inserting idle vCPUs in runqueues"):
> > 
> > diff --git a/xen/common/schedule.c b/xen/common/schedule.c
> > index 2beebe8..fddcd52 100644
> > --- a/xen/common/schedule.c
> > +++ b/xen/common/schedule.c
> > @@ -240,20 +240,22 @@ int sched_init_vcpu(struct vcpu *v, unsigned
> > int processor)
> >      init_timer(&v->poll_timer, poll_timer_fn,
> >                 v, v->processor);
> >  
> > -    /* Idle VCPUs are scheduled immediately. */
> > +    v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d-
> > >sched_priv);
> > +    if ( v->sched_priv == NULL )
> > +        return 1;
> > +
> > +    TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
> > +
> > +    /* Idle VCPUs are scheduled immediately, so don't put them in
> > runqueue. */
> >      if ( is_idle_domain(d) )
> >      {
> >          per_cpu(schedule_data, v->processor).curr = v;
> >          v->is_running = 1;
> >      }
> > -
> > -    TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
> > -
> > -    v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d-
> > >sched_priv);
> > -    if ( v->sched_priv == NULL )
> > -        return 1;
> > -
> > -    SCHED_OP(DOM2OP(d), insert_vcpu, v);
> > +    else
> > +    {
> > +        SCHED_OP(DOM2OP(d), insert_vcpu, v);
> > +    }
> >  
> >      return 0;
> >  }
> > 
> > So, yeah, it's proving a little more complicated than how I thought
> > it
> > would have, just by looking at the patches. :-/
> > 
> > Will let know.
> FWIW, this looks very similar to the regression I just raised against
> Xen 4.7 "[Xen-devel] Scheduler regression in 4.7".  The stack traces
> are
> suspiciously similar.  
>
I thought the same at the beginning, but they actually may not be the
same or even related.

This happens at early boot, and reason is we try to call
csched_cpu_pick() on the idle vcpus, which does not make any sense, and
in fact one of the ASSERTS triggers.

In your case, system has booted fine already. And the reason for that
is you're looking at 4.7, and 4.7 is no longer calling insert_vcpu(),
which then calls csched_cpu_pick(), on idle vcpus at boot, thanks to
the patch I'm mentioning above.

And in fact, I confirm that, on 4.6, with "just" the hunk above of said
patch, I can boot, create a (large) VM, play a bit with it, shutdown or
reboot it, and shutdown the host as well.

Also, yours seems to _explode_ because of a race on the runq (in
IS_RUNQ_IDLE()), this one _asserts_ here:

        /* Pick an online CPU from the proper affinity mask */
        csched_balance_cpumask(vc, balance_step, &cpus);

        cpumask_and(&cpus, &cpus, online);
        /* If present, prefer vc's current processor */
        cpu = cpumask_test_cpu(vc->processor, &cpus)
                ? vc->processor
                : cpumask_cycle(vc->processor, &cpus);
        ASSERT(cpumask_test_cpu(cpu, &cpus));

Because, as I said, we're on early boot, and most likely, there's
almost no one in online!

> I expect they have the same root cause.
> 
No, I think they're two different things.

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* dependences for backporting to 4.6 [was: Re: [PATCH 2/3] xen: Have schedulers revise initial placement]
  2016-08-05 13:24       ` Jan Beulich
  2016-08-05 14:09         ` Dario Faggioli
  2016-08-11 14:59         ` Dario Faggioli
@ 2016-08-12  1:59         ` Dario Faggioli
  2016-08-12 13:53           ` Jan Beulich
  2016-08-12  8:58         ` dependences for backporting to 4.5 " Dario Faggioli
  3 siblings, 1 reply; 33+ messages in thread
From: Dario Faggioli @ 2016-08-12  1:59 UTC (permalink / raw)
  To: Jan Beulich, George Dunlap; +Cc: xen-devel, Anshul Makkar, MengXu


[-- Attachment #1.1: Type: text/plain, Size: 2821 bytes --]

On Fri, 2016-08-05 at 07:24 -0600, Jan Beulich wrote:
> I'd really like to have those backported, but I have to ask one
> of you to identify which prereq-s are needed on 4.6 and 4.5
> (I'll revert them from 4.5 right away, but I'll wait for an osstest
> flight to confirm the same issue exists on 4.6).
>
So, for 4.6, I think the only prerequisite would be this:

6b53bb4ab3c9bd5eccde88a5175cf72589ba6d52
"sched: better handle (not) inserting idle vCPUs in runqueues"

That, however, does not apply cleanly. The important part of it is the
last hunk:

diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 92057eb..c195129 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -240,20 +240,22 @@ int sched_init_vcpu(struct vcpu *v, unsigned int processor)
     init_timer(&v->poll_timer, poll_timer_fn,
                v, v->processor);
 
-    /* Idle VCPUs are scheduled immediately. */
+    v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv);
+    if ( v->sched_priv == NULL )
+        return 1;
+
+    TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
+
+    /* Idle VCPUs are scheduled immediately, so don't put them in runqueue. */
     if ( is_idle_domain(d) )
     {
         per_cpu(schedule_data, v->processor).curr = v;
         v->is_running = 1;
     }
-
-    TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
-
-    v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv);
-    if ( v->sched_priv == NULL )
-        return 1;
-
-    SCHED_OP(DOM2OP(d), insert_vcpu, v);
+    else
+    {
+        SCHED_OP(DOM2OP(d), insert_vcpu, v);
+    }
 
     return 0;
 }

With this only applied, things works for me. The hunk is actually the
core of the patch, the only real functionality change. The other hunks
are refactoring and cleanups (made possible by it).

So, I'm not sure whether the best route here is:
 - fully backport 6b53bb4ab3c9b;
 - backport only the last hunk of 6b53bb4ab3c9b as its own patch;
 - fold the last hunk of 6b53bb4ab3c9b in the backport of George's 
   patch (I mean, what was 83dff3992a89 in staging-4.6);

Thoughts?

Note that the issue Andrew is reporting (on 4.7) is, IMO, unrelated to
the hunk above, and may well affect staging as well and (if that's the
case) should be fixed there, and the fix backported, I think.

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* dependences for backporting to 4.5 [was: Re: [PATCH 2/3] xen: Have schedulers revise initial placement]
  2016-08-05 13:24       ` Jan Beulich
                           ` (2 preceding siblings ...)
  2016-08-12  1:59         ` dependences for backporting to 4.6 [was: Re: [PATCH 2/3] xen: Have schedulers revise initial placement] Dario Faggioli
@ 2016-08-12  8:58         ` Dario Faggioli
  3 siblings, 0 replies; 33+ messages in thread
From: Dario Faggioli @ 2016-08-12  8:58 UTC (permalink / raw)
  To: Jan Beulich, George Dunlap; +Cc: xen-devel, Anshul Makkar, MengXu


[-- Attachment #1.1: Type: text/plain, Size: 1483 bytes --]

On Fri, 2016-08-05 at 07:24 -0600, Jan Beulich wrote:
> > > > On 01.08.16 at 14:32, <dario.faggioli@citrix.com> wrote:
> > Yes, I think they're good backporting candidates.
> Well, they appear to work fine on 4.7, but putting them onto 4.5
> causes an early boot crash (BUG_ON( cpu != svc->vcpu->processor )
> in __runq_insert()). 
>
I just tested staging-4.5 plus your backport of these patches (i.e.,
the two commits that you have then reverted), and I haven't seen
this... how can it that be?

> Pulling in e59321d154 ("credit: remove cpu
> argument to __runq_insert()") obviously makes that crash go
> away, just to, a little later, hit the similar one close to the top
> of
> csched_load_balance().
> 
In fact, I'd say that as far as I can see, the same that I said about
backporting to 4.6, does the trick for 4.5 as well:

https://lists.xen.org/archives/html/xen-devel/2016-08/msg01673.html

I've double checked that I am using the proper tree and code, and it
looks like I do.

Can (anyone of) you have a go with that?

If I'm right, and "just" avoiding to call insert_vcpu() for idle vcpu
is enough, the question about what's the best path is the same as fo
4.6.

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* dependences for backporting to 4.6 [was: Re: [PATCH 2/3] xen: Have schedulers revise initial placement]
  2016-08-12  1:59         ` dependences for backporting to 4.6 [was: Re: [PATCH 2/3] xen: Have schedulers revise initial placement] Dario Faggioli
@ 2016-08-12 13:53           ` Jan Beulich
  2016-08-16 10:21             ` Dario Faggioli
  0 siblings, 1 reply; 33+ messages in thread
From: Jan Beulich @ 2016-08-12 13:53 UTC (permalink / raw)
  To: Dario Faggioli; +Cc: xen-devel, Anshul Makkar, MengXu, George Dunlap

>>> On 12.08.16 at 03:59, <dario.faggioli@citrix.com> wrote:
> On Fri, 2016-08-05 at 07:24 -0600, Jan Beulich wrote:
>> I'd really like to have those backported, but I have to ask one
>> of you to identify which prereq-s are needed on 4.6 and 4.5
>> (I'll revert them from 4.5 right away, but I'll wait for an osstest
>> flight to confirm the same issue exists on 4.6).
>>
> So, for 4.6, I think the only prerequisite would be this:
> 
> 6b53bb4ab3c9bd5eccde88a5175cf72589ba6d52
> "sched: better handle (not) inserting idle vCPUs in runqueues"
> 
> That, however, does not apply cleanly. The important part of it is the
> last hunk:
> 
> diff --git a/xen/common/schedule.c b/xen/common/schedule.c
> index 92057eb..c195129 100644
> --- a/xen/common/schedule.c
> +++ b/xen/common/schedule.c
> @@ -240,20 +240,22 @@ int sched_init_vcpu(struct vcpu *v, unsigned int 
> processor)
>      init_timer(&v->poll_timer, poll_timer_fn,
>                 v, v->processor);
>  
> -    /* Idle VCPUs are scheduled immediately. */
> +    v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv);
> +    if ( v->sched_priv == NULL )
> +        return 1;
> +
> +    TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
> +
> +    /* Idle VCPUs are scheduled immediately, so don't put them in runqueue. 
> */
>      if ( is_idle_domain(d) )
>      {
>          per_cpu(schedule_data, v->processor).curr = v;
>          v->is_running = 1;
>      }
> -
> -    TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
> -
> -    v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv);
> -    if ( v->sched_priv == NULL )
> -        return 1;
> -
> -    SCHED_OP(DOM2OP(d), insert_vcpu, v);
> +    else
> +    {
> +        SCHED_OP(DOM2OP(d), insert_vcpu, v);
> +    }
>  
>      return 0;
>  }
> 
> With this only applied, things works for me. The hunk is actually the
> core of the patch, the only real functionality change. The other hunks
> are refactoring and cleanups (made possible by it).
> 
> So, I'm not sure whether the best route here is:
>  - fully backport 6b53bb4ab3c9b;
>  - backport only the last hunk of 6b53bb4ab3c9b as its own patch;
>  - fold the last hunk of 6b53bb4ab3c9b in the backport of George's 
>    patch (I mean, what was 83dff3992a89 in staging-4.6);
> 
> Thoughts?

First of all - thanks a lot for helping out here. With above extra
commit things are indeed back to normal again for me. Since the
adjustments to that commit to make it apply were mostly
mechanical, I think I'd prefer taking the entire backport. Same
for 4.5 then, were the backport adjusted for 4.6 then applied
cleanly.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: dependences for backporting to 4.6 [was: Re: [PATCH 2/3] xen: Have schedulers revise initial placement]
  2016-08-12 13:53           ` Jan Beulich
@ 2016-08-16 10:21             ` Dario Faggioli
  2016-08-16 11:21               ` Jan Beulich
  0 siblings, 1 reply; 33+ messages in thread
From: Dario Faggioli @ 2016-08-16 10:21 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Anshul Makkar, MengXu, George Dunlap


[-- Attachment #1.1: Type: text/plain, Size: 1399 bytes --]

On Fri, 2016-08-12 at 07:53 -0600, Jan Beulich wrote:
> > > > On 12.08.16 at 03:59, <dario.faggioli@citrix.com> wrote:
> > So, I'm not sure whether the best route here is:
> >  - fully backport 6b53bb4ab3c9b;
> >  - backport only the last hunk of 6b53bb4ab3c9b as its own patch;
> >  - fold the last hunk of 6b53bb4ab3c9b in the backport of George's 
> >    patch (I mean, what was 83dff3992a89 in staging-4.6);
> > 
> > Thoughts?
> First of all - thanks a lot for helping out here. 
>
:-)

> With above extra
> commit things are indeed back to normal again for me. Since the
> adjustments to that commit to make it apply were mostly
> mechanical, I think I'd prefer taking the entire backport. 
>
Fine.

> Same
> for 4.5 then, were the backport adjusted for 4.6 then applied
> cleanly.
> 
So, you've done the backports yourself, and you don't want/need me to
do them right?

I'm asking because that's how I read what you're saying here, but I
don't see that having happened in staging-{4.5,4.6}. If that's me
failing to check, or checking in the wrong place, sorry for the noise.

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: dependences for backporting to 4.6 [was: Re: [PATCH 2/3] xen: Have schedulers revise initial placement]
  2016-08-16 10:21             ` Dario Faggioli
@ 2016-08-16 11:21               ` Jan Beulich
  0 siblings, 0 replies; 33+ messages in thread
From: Jan Beulich @ 2016-08-16 11:21 UTC (permalink / raw)
  To: Dario Faggioli; +Cc: xen-devel, Anshul Makkar, MengXu, George Dunlap

>>> On 16.08.16 at 12:21, <dario.faggioli@citrix.com> wrote:
> On Fri, 2016-08-12 at 07:53 -0600, Jan Beulich wrote:
>> Same
>> for 4.5 then, were the backport adjusted for 4.6 then applied
>> cleanly.
>> 
> So, you've done the backports yourself, and you don't want/need me to
> do them right?

Indeed.

> I'm asking because that's how I read what you're saying here, but I
> don't see that having happened in staging-{4.5,4.6}. If that's me
> failing to check, or checking in the wrong place, sorry for the noise.

Well, I do things in batches, so these will now simply be part of the
next batch.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2016-08-16 11:21 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-15 18:02 [PATCH 1/3] xen: Some code motion to avoid having to do forward-declaration George Dunlap
2016-07-15 18:02 ` [PATCH 2/3] xen: Have schedulers revise initial placement George Dunlap
2016-07-15 18:07   ` Andrew Cooper
2016-07-16 14:12     ` Dario Faggioli
2016-07-18 18:10       ` Andrew Cooper
2016-07-18 18:55         ` Dario Faggioli
2016-07-18 21:36           ` Andrew Cooper
2016-07-19  7:14             ` Dario Faggioli
2016-07-18 10:28   ` Dario Faggioli
2016-07-25 11:17     ` George Dunlap
2016-07-25 14:36       ` Meng Xu
2016-07-26  9:17       ` Dario Faggioli
2016-07-25 14:35   ` Meng Xu
2016-08-01 10:40   ` Jan Beulich
2016-08-01 12:32     ` Dario Faggioli
2016-08-05 13:24       ` Jan Beulich
2016-08-05 14:09         ` Dario Faggioli
2016-08-05 14:44           ` Jan Beulich
2016-08-11 14:59         ` Dario Faggioli
2016-08-11 15:51           ` Andrew Cooper
2016-08-11 23:35             ` Dario Faggioli
2016-08-12  1:59         ` dependences for backporting to 4.6 [was: Re: [PATCH 2/3] xen: Have schedulers revise initial placement] Dario Faggioli
2016-08-12 13:53           ` Jan Beulich
2016-08-16 10:21             ` Dario Faggioli
2016-08-16 11:21               ` Jan Beulich
2016-08-12  8:58         ` dependences for backporting to 4.5 " Dario Faggioli
2016-07-15 18:02 ` [PATCH 3/3] xen: Remove buggy initial placement algorithm George Dunlap
2016-07-15 18:10   ` Andrew Cooper
2016-07-16 13:55   ` Dario Faggioli
2016-07-18 10:03     ` George Dunlap
2016-07-16 15:48 ` [PATCH 1/3] xen: Some code motion to avoid having to do forward-declaration Meng Xu
2016-07-18  9:58 ` Dario Faggioli
2016-07-18 10:06   ` George Dunlap

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).