[PATCH v4] xen: sched_rt: print useful affinity info when dumping

* [PATCH v4] xen: sched_rt: print useful affinity info when dumping
@ 2015-05-12 14:06 Dario Faggioli
  2015-05-12 16:01 ` Meng Xu
  2015-06-01 13:20 ` George Dunlap
  0 siblings, 2 replies; 5+ messages in thread
From: Dario Faggioli @ 2015-05-12 14:06 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Keir Fraser, Meng Xu, Jan Beulich

In fact, printing the cpupool's CPU online mask
for each vCPU is just redundant, as that is the
same for all the vCPUs of all the domains in the
same cpupool, while hard affinity is already part
of the output of dumping domains info.

Instead, print the intersection between hard
affinity and online CPUs, which is --in case of this
scheduler-- the effective affinity always used for
the vCPUs.

This change also takes the chance to add a scratch
cpumask area, to avoid having to either put one
(more) cpumask_t on the stack, or dynamically
allocate it within the dumping routine. (The former
being bad because hypervisor stack size is limited,
the latter because dynamic allocations can fail, if
the hypervisor was built for a large enough number
of CPUs.) We allocate such scratch area, for all pCPUs,
when the first instance of the RTDS scheduler is
activated and, in order not to loose track/leak it
if other instances are activated in new cpupools,
and when the last instance is deactivated, we (sort
of) refcount it.

Such scratch area can be used to kill most of the
cpumasks{_var}_t local variables in other functions
in the file, but that is *NOT* done in this chage.

Finally, convert the file to use keyhandler scratch,
instead of open coded string buffers.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Meng Xu <xumengpanda@gmail.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Keir Fraser <keir@xen.org>
---
Changes from v3:
 * coding style and '!' vs '==NULL' consistency improved,
   as requested during review

Changes from v2:
 * added refcounting, to avoid leaking the scratch cpumasks
 * added some ASSERT()s to validate the refcounting

Changes from v1:
 * improved changelog;
 * made a local variable to point to the correct
   scratch mask, as suggested during review.
---
 xen/common/sched_rt.c |   66 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 57 insertions(+), 9 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 7c39a9e..59ead57 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -124,6 +124,24 @@
 #define TRC_RTDS_BUDGET_REPLENISH TRC_SCHED_CLASS_EVT(RTDS, 4)
 #define TRC_RTDS_SCHED_TASKLET    TRC_SCHED_CLASS_EVT(RTDS, 5)
 
+ /*
+  * Useful to avoid too many cpumask_var_t on the stack.
+  */
+static cpumask_t **_cpumask_scratch;
+#define cpumask_scratch _cpumask_scratch[smp_processor_id()]
+
+/*
+ * We want to only allocate the _cpumask_scratch array the first time an
+ * instance of this scheduler is used, and avoid reallocating and leaking
+ * the old one when more instance are activated inside new cpupools. We
+ * also want to get rid of it when the last instance is de-inited.
+ *
+ * So we (sort of) reference count the number of initialized instances. This
+ * does not need to happen via atomic_t refcounters, as it only happens either
+ * during boot, or under the protection of the cpupool_lock spinlock.
+ */
+static unsigned int nr_rt_ops;
+
 /*
  * Systme-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
@@ -218,8 +236,7 @@ __q_elem(struct list_head *elem)
 static void
 rt_dump_vcpu(const struct scheduler *ops, const struct rt_vcpu *svc)
 {
-    char cpustr[1024];
-    cpumask_t *cpupool_mask;
+    cpumask_t *cpupool_mask, *mask;
 
     ASSERT(svc != NULL);
     /* idle vcpu */
@@ -229,10 +246,22 @@ rt_dump_vcpu(const struct scheduler *ops, const struct rt_vcpu *svc)
         return;
     }
 
-    cpumask_scnprintf(cpustr, sizeof(cpustr), svc->vcpu->cpu_hard_affinity);
+    /*
+     * We can't just use 'cpumask_scratch' because the dumping can
+     * happen from a pCPU outside of this scheduler's cpupool, and
+     * hence it's not right to use the pCPU's scratch mask (which
+     * may even not exist!). On the other hand, it is safe to use
+     * svc->vcpu->processor's own scratch space, since we hold the
+     * runqueue lock.
+     */
+    mask = _cpumask_scratch[svc->vcpu->processor];
+
+    cpupool_mask = cpupool_scheduler_cpumask(svc->vcpu->domain->cpupool);
+    cpumask_and(mask, cpupool_mask, svc->vcpu->cpu_hard_affinity);
+    cpulist_scnprintf(keyhandler_scratch, sizeof(keyhandler_scratch), mask);
     printk("[%5d.%-2u] cpu %u, (%"PRI_stime", %"PRI_stime"),"
            " cur_b=%"PRI_stime" cur_d=%"PRI_stime" last_start=%"PRI_stime"\n"
-           " \t\t onQ=%d runnable=%d cpu_hard_affinity=%s ",
+           " \t\t onQ=%d runnable=%d flags=%x effective hard_affinity=%s\n",
             svc->vcpu->domain->domain_id,
             svc->vcpu->vcpu_id,
             svc->vcpu->processor,
@@ -243,11 +272,8 @@ rt_dump_vcpu(const struct scheduler *ops, const struct rt_vcpu *svc)
             svc->last_start,
             __vcpu_on_q(svc),
             vcpu_runnable(svc->vcpu),
-            cpustr);
-    memset(cpustr, 0, sizeof(cpustr));
-    cpupool_mask = cpupool_scheduler_cpumask(svc->vcpu->domain->cpupool);
-    cpumask_scnprintf(cpustr, sizeof(cpustr), cpupool_mask);
-    printk("cpupool=%s\n", cpustr);
+            svc->flags,
+            keyhandler_scratch);
 }
 
 static void
@@ -409,6 +435,16 @@ rt_init(struct scheduler *ops)
     if ( prv == NULL )
         return -ENOMEM;
 
+    ASSERT( _cpumask_scratch == NULL || nr_rt_ops > 0 );
+
+    if ( !_cpumask_scratch )
+    {
+        _cpumask_scratch = xmalloc_array(cpumask_var_t, nr_cpu_ids);
+        if ( !_cpumask_scratch )
+            return -ENOMEM;
+    }
+    nr_rt_ops++;
+
     spin_lock_init(&prv->lock);
     INIT_LIST_HEAD(&prv->sdom);
     INIT_LIST_HEAD(&prv->runq);
@@ -426,6 +462,13 @@ rt_deinit(const struct scheduler *ops)
 {
     struct rt_private *prv = rt_priv(ops);
 
+    ASSERT( _cpumask_scratch && nr_rt_ops > 0 );
+
+    if ( (--nr_rt_ops) == 0 )
+    {
+        xfree(_cpumask_scratch);
+        _cpumask_scratch = NULL;
+    }
     xfree(prv);
 }
 
@@ -443,6 +486,9 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu)
     per_cpu(schedule_data, cpu).schedule_lock = &prv->lock;
     spin_unlock_irqrestore(&prv->lock, flags);
 
+    if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) )
+        return NULL;
+
     /* 1 indicates alloc. succeed in schedule.c */
     return (void *)1;
 }
@@ -462,6 +508,8 @@ rt_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
     sd->schedule_lock = &sd->_lock;
 
     spin_unlock_irqrestore(&prv->lock, flags);
+
+    free_cpumask_var(_cpumask_scratch[cpu]);
 }
 
 static void *

^ permalink raw reply related	[flat|nested] 5+ messages in thread