[Patch] continue_hypercall_on_cpu rework using tasklets

* [Patch] continue_hypercall_on_cpu rework using tasklets
@ 2010-04-13 13:19 Juergen Gross
  2010-04-13 13:40 ` Jan Beulich
  2010-04-13 14:41 ` Keir Fraser
  0 siblings, 2 replies; 44+ messages in thread
From: Juergen Gross @ 2010-04-13 13:19 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 1187 bytes --]

Hi,

attached is a patch to use tasklets for continue_hypercall_on_cpu instead of
temporarily pinning the vcpu to the target physical cpu.

This is thought as base for cpupools as Keir requested to get rid of the
"borrow cpu" stuff in my original solution.

This is a rework of the patch I sent before based on some comments by Jan
Beulich.

Keir, if you agree to take this patch, I'll send out the cpupool stuff based
on this patch again. There is just one question pending:

cpupools are using continue_hypercall_on_cpu, but this function is defined for
x86 only, not for ia64. I see 2 possible solutions:

1. make continue_hypercall_on_cpu available on ia64, too
2. make cpupools a pure x86 feature

I think the first solution would be better, but I would need help on this as I
don't have any test machine.


Juergen

-- 
Juergen Gross                 Principal Developer Operating Systems
TSP ES&S SWE OS6                       Telephone: +49 (0) 89 3222 2967
Fujitsu Technology Solutions              e-mail: juergen.gross@ts.fujitsu.com
Domagkstr. 28                           Internet: ts.fujitsu.com
D-80807 Muenchen                 Company details: ts.fujitsu.com/imprint.html

[-- Attachment #2: tasklet.patch --]
[-- Type: text/x-patch, Size: 15037 bytes --]

# HG changeset patch
# User juergen.gross@ts.fujitsu.com
# Date 1271164572 -7200
# Node ID 580130fe415ce646861d1a544f9b93fc1e81835f
# Parent  aae7cb2f18411492c720d3b60adaa979858a63df

Signed-off-by: juergen.gross@ts.fujitsu.com

continue_hypercall_on_cpu rework using tasklets

diff -r aae7cb2f1841 -r 580130fe415c xen/arch/x86/acpi/power.c

--- a/xen/arch/x86/acpi/power.c	Fri Apr 09 08:54:25 2010 +0100
+++ b/xen/arch/x86/acpi/power.c	Tue Apr 13 15:16:12 2010 +0200
@@ -234,7 +234,7 @@
     return error;
 }
 
-static long enter_state_helper(void *data)
+static long enter_state_helper(void *hdl, void *data)
 {
     struct acpi_sleep_info *sinfo = (struct acpi_sleep_info *)data;
     return enter_state(sinfo->sleep_state);
@@ -265,7 +265,7 @@
     acpi_sinfo.pm1b_cnt_val = sleep->pm1b_cnt_val;
     acpi_sinfo.sleep_state = sleep->sleep_state;
 
-    return continue_hypercall_on_cpu(0, enter_state_helper, &acpi_sinfo);
+    return continue_hypercall_on_cpu(0, NULL, enter_state_helper, &acpi_sinfo);
 }
 
 static int acpi_get_wake_status(void)
diff -r aae7cb2f1841 -r 580130fe415c xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c	Fri Apr 09 08:54:25 2010 +0100
+++ b/xen/arch/x86/domain.c	Tue Apr 13 15:16:12 2010 +0200
@@ -1518,42 +1518,52 @@
 }
 
 struct migrate_info {
-    long (*func)(void *data);
+    struct tasklet tasklet;
+    long (*func)(void *hdl, void *data);
     void *data;
     void (*saved_schedule_tail)(struct vcpu *);
-    cpumask_t saved_affinity;
-    unsigned int nest;
+    volatile int nest;
+    long ret;
+    struct vcpu *v;
 };
 
 static void continue_hypercall_on_cpu_helper(struct vcpu *v)
 {
     struct cpu_user_regs *regs = guest_cpu_user_regs();
     struct migrate_info *info = v->arch.continue_info;
-    cpumask_t mask = info->saved_affinity;
     void (*saved_schedule_tail)(struct vcpu *) = info->saved_schedule_tail;
 
-    regs->eax = info->func(info->data);
+    regs->eax = info->ret;
 
-    if ( info->nest-- == 0 )
-    {
-        xfree(info);
-        v->arch.schedule_tail = saved_schedule_tail;
-        v->arch.continue_info = NULL;
-        vcpu_unlock_affinity(v, &mask);
-    }
+    tasklet_kill(&info->tasklet);
+    xfree(info);
+    v->arch.schedule_tail = saved_schedule_tail;
+    v->arch.continue_info = NULL;
 
     (*saved_schedule_tail)(v);
 }
 
-int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data)
+static void continue_hypercall_on_cpu_tasklet(struct migrate_info *info)
+{
+    info->ret = info->func((void *)info, info->data);
+
+    if ( info->nest-- == 0 )
+        vcpu_unpause(info->v);
+
+    return;
+}
+
+int continue_hypercall_on_cpu(int cpu, void *hdl,
+                              long (*func)(void *hdl, void *data), void *data)
 {
     struct vcpu *v = current;
-    struct migrate_info *info;
-    cpumask_t mask = cpumask_of_cpu(cpu);
-    int rc;
+    struct migrate_info *info = (struct migrate_info *)hdl;
 
     if ( cpu == smp_processor_id() )
-        return func(data);
+        return func(info, data);
+
+    if ( info != NULL )
+        v = info->v;
 
     info = v->arch.continue_info;
     if ( info == NULL )
@@ -1562,34 +1572,30 @@
         if ( info == NULL )
             return -ENOMEM;
 
-        rc = vcpu_lock_affinity(v, &mask);
-        if ( rc )
-        {
-            xfree(info);
-            return rc;
-        }
-
         info->saved_schedule_tail = v->arch.schedule_tail;
-        info->saved_affinity = mask;
         info->nest = 0;
+        info->v = v;
+        tasklet_init(&info->tasklet,
+                     (void(*)(unsigned long))continue_hypercall_on_cpu_tasklet,
+                     (unsigned long)info);
 
         v->arch.schedule_tail = continue_hypercall_on_cpu_helper;
         v->arch.continue_info = info;
+        vcpu_pause_nosync(v);
     }
     else
     {
         BUG_ON(info->nest != 0);
-        rc = vcpu_locked_change_affinity(v, &mask);
-        if ( rc )
-            return rc;
         info->nest++;
     }
 
     info->func = func;
     info->data = data;
 
+    tasklet_schedule_cpu(&info->tasklet, cpu);
+    raise_softirq(SCHEDULE_SOFTIRQ);
+
     /* Dummy return value will be overwritten by new schedule_tail. */
-    BUG_ON(!test_bit(SCHEDULE_SOFTIRQ, &softirq_pending(smp_processor_id())));
     return 0;
 }
 
diff -r aae7cb2f1841 -r 580130fe415c xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c	Fri Apr 09 08:54:25 2010 +0100
+++ b/xen/arch/x86/microcode.c	Tue Apr 13 15:16:12 2010 +0200
@@ -116,7 +116,7 @@
     return err;
 }
 
-static long do_microcode_update(void *_info)
+static long do_microcode_update(void *hdl, void *_info)
 {
     struct microcode_info *info = _info;
     int error;
@@ -129,7 +129,8 @@
 
     info->cpu = next_cpu(info->cpu, cpu_online_map);
     if ( info->cpu < NR_CPUS )
-        return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
+        return continue_hypercall_on_cpu(info->cpu, hdl,
+                                         do_microcode_update, info);
 
     error = info->error;
     xfree(info);
@@ -162,5 +163,6 @@
     info->error = 0;
     info->cpu = first_cpu(cpu_online_map);
 
-    return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
+    return continue_hypercall_on_cpu(info->cpu, NULL,
+                                     do_microcode_update, info);
 }
diff -r aae7cb2f1841 -r 580130fe415c xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c	Fri Apr 09 08:54:25 2010 +0100
+++ b/xen/arch/x86/platform_hypercall.c	Tue Apr 13 15:16:12 2010 +0200
@@ -48,12 +48,12 @@
 extern int set_px_pminfo(uint32_t cpu, struct xen_processor_performance *perf);
 extern long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power);
 
-static long cpu_frequency_change_helper(void *data)
+static long cpu_frequency_change_helper(void *hdl, void *data)
 {
     return cpu_frequency_change(this_cpu(freq));
 }
 
-static long cpu_down_helper(void *data)
+static long cpu_down_helper(void *hdl, void *data)
 {
     int cpu = (unsigned long)data;
     return cpu_down(cpu);
@@ -314,7 +314,7 @@
         if ( op->u.change_freq.flags || !cpu_online(op->u.change_freq.cpu) )
             break;
         per_cpu(freq, op->u.change_freq.cpu) = op->u.change_freq.freq;
-        ret = continue_hypercall_on_cpu(op->u.change_freq.cpu,
+        ret = continue_hypercall_on_cpu(op->u.change_freq.cpu, NULL,
                                         cpu_frequency_change_helper,
                                         NULL);
         break;
@@ -467,7 +467,7 @@
             break;
         }
         ret = continue_hypercall_on_cpu(
-          0, cpu_down_helper, (void *)(unsigned long)cpu);
+          0, NULL, cpu_down_helper, (void *)(unsigned long)cpu);
         break;
     }
     break;
diff -r aae7cb2f1841 -r 580130fe415c xen/arch/x86/sysctl.c
--- a/xen/arch/x86/sysctl.c	Fri Apr 09 08:54:25 2010 +0100
+++ b/xen/arch/x86/sysctl.c	Tue Apr 13 15:16:12 2010 +0200
@@ -29,7 +29,7 @@
 
 #define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
 
-static long cpu_down_helper(void *data)
+static long cpu_down_helper(void *hdl, void *data)
 {
     int cpu = (unsigned long)data;
     return cpu_down(cpu);
@@ -241,7 +241,7 @@
             break;
         case XEN_SYSCTL_CPU_HOTPLUG_OFFLINE:
             ret = continue_hypercall_on_cpu(
-                0, cpu_down_helper, (void *)(unsigned long)cpu);
+                0, NULL, cpu_down_helper, (void *)(unsigned long)cpu);
             break;
         case XEN_SYSCTL_CPU_HOTPLUG_STATUS:
             ret = 0;
diff -r aae7cb2f1841 -r 580130fe415c xen/common/schedule.c
--- a/xen/common/schedule.c	Fri Apr 09 08:54:25 2010 +0100
+++ b/xen/common/schedule.c	Tue Apr 13 15:16:12 2010 +0200
@@ -408,26 +408,18 @@
     }
 }
 
-static int __vcpu_set_affinity(
-    struct vcpu *v, cpumask_t *affinity,
-    bool_t old_lock_status, bool_t new_lock_status)
+int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
 {
     cpumask_t online_affinity, old_affinity;
+
+    if ( v->domain->is_pinned )
+        return -EINVAL;
 
     cpus_and(online_affinity, *affinity, cpu_online_map);
     if ( cpus_empty(online_affinity) )
         return -EINVAL;
 
     vcpu_schedule_lock_irq(v);
-
-    if ( v->affinity_locked != old_lock_status )
-    {
-        BUG_ON(!v->affinity_locked);
-        vcpu_schedule_unlock_irq(v);
-        return -EBUSY;
-    }
-
-    v->affinity_locked = new_lock_status;
 
     old_affinity = v->cpu_affinity;
     v->cpu_affinity = *affinity;
@@ -444,36 +436,6 @@
     }
 
     return 0;
-}
-
-int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
-{
-    if ( v->domain->is_pinned )
-        return -EINVAL;
-    return __vcpu_set_affinity(v, affinity, 0, 0);
-}
-
-int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity)
-{
-    return __vcpu_set_affinity(v, affinity, 0, 1);
-}
-
-int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity)
-{
-    return __vcpu_set_affinity(v, affinity, 1, 1);
-}
-
-void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity)
-{
-    cpumask_t online_affinity;
-
-    /* Do not fail if no CPU in old affinity mask is online. */
-    cpus_and(online_affinity, *affinity, cpu_online_map);
-    if ( cpus_empty(online_affinity) )
-        *affinity = cpu_online_map;
-
-    if ( __vcpu_set_affinity(v, affinity, 1, 0) != 0 )
-        BUG();
 }
 
 /* Block the currently-executing domain until a pertinent event occurs. */
diff -r aae7cb2f1841 -r 580130fe415c xen/common/softirq.c
--- a/xen/common/softirq.c	Fri Apr 09 08:54:25 2010 +0100
+++ b/xen/common/softirq.c	Tue Apr 13 15:16:12 2010 +0200
@@ -88,9 +88,11 @@
 }
 
 static LIST_HEAD(tasklet_list);
+static DEFINE_PER_CPU(struct list_head, tasklet_list_pcpu);
 static DEFINE_SPINLOCK(tasklet_lock);
 
-void tasklet_schedule(struct tasklet *t)
+static void tasklet_schedule_list(struct tasklet *t, struct list_head *tlist,
+    int cpu)
 {
     unsigned long flags;
 
@@ -101,28 +103,47 @@
         if ( !t->is_scheduled && !t->is_running )
         {
             BUG_ON(!list_empty(&t->list));
-            list_add_tail(&t->list, &tasklet_list);
+            list_add_tail(&t->list, tlist);
+            t->scheduled_on = NR_CPUS;
         }
         t->is_scheduled = 1;
-        raise_softirq(TASKLET_SOFTIRQ);
+        if ( cpu == smp_processor_id() )
+            raise_softirq(TASKLET_SOFTIRQ);
+        else if ( !t->is_running )
+            cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+        else
+            t->scheduled_on = cpu;
     }
 
     spin_unlock_irqrestore(&tasklet_lock, flags);
 }
 
+void tasklet_schedule(struct tasklet *t)
+{
+    tasklet_schedule_list(t, &tasklet_list, smp_processor_id());
+}
+
+void tasklet_schedule_cpu(struct tasklet *t, int cpu)
+{
+    tasklet_schedule_list(t, &per_cpu(tasklet_list_pcpu, cpu), cpu);
+}
+
 static void tasklet_action(void)
 {
+    struct list_head *tlist;
     struct tasklet *t;
 
     spin_lock_irq(&tasklet_lock);
 
-    if ( list_empty(&tasklet_list) )
+    tlist = ( list_empty(&this_cpu(tasklet_list_pcpu)) ) ? &tasklet_list :
+        &this_cpu(tasklet_list_pcpu);
+    if ( list_empty(tlist) )
     {
         spin_unlock_irq(&tasklet_lock);
         return;
     }
 
-    t = list_entry(tasklet_list.next, struct tasklet, list);
+    t = list_entry(tlist->next, struct tasklet, list);
     list_del_init(&t->list);
 
     BUG_ON(t->is_dead || t->is_running || !t->is_scheduled);
@@ -138,14 +159,23 @@
     if ( t->is_scheduled )
     {
         BUG_ON(t->is_dead || !list_empty(&t->list));
-        list_add_tail(&t->list, &tasklet_list);
+        if ( t->scheduled_on >= NR_CPUS )
+            list_add_tail(&t->list, tlist);
+        else
+        {
+            unsigned int cpu = t->scheduled_on;
+
+            list_add_tail(&t->list, &per_cpu(tasklet_list_pcpu, cpu));
+            cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+        }
     }
 
     /*
      * If there is more work to do then reschedule. We don't grab more work
      * immediately as we want to allow other softirq work to happen first.
      */
-    if ( !list_empty(&tasklet_list) )
+    if ( !list_empty(&tasklet_list) ||
+        !list_empty(&this_cpu(tasklet_list_pcpu)) )
         raise_softirq(TASKLET_SOFTIRQ);
 
     spin_unlock_irq(&tasklet_lock);
@@ -186,6 +216,12 @@
 
 void __init softirq_init(void)
 {
+    int i;
+
+    for_each_possible_cpu ( i )
+    {
+        INIT_LIST_HEAD(&per_cpu(tasklet_list_pcpu, i));
+    }
     open_softirq(TASKLET_SOFTIRQ, tasklet_action);
 }
 
diff -r aae7cb2f1841 -r 580130fe415c xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h	Fri Apr 09 08:54:25 2010 +0100
+++ b/xen/include/asm-x86/domain.h	Tue Apr 13 15:16:12 2010 +0200
@@ -452,7 +452,8 @@
 #define hvm_svm         hvm_vcpu.u.svm
 
 /* Continue the current hypercall via func(data) on specified cpu. */
-int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data);
+int continue_hypercall_on_cpu(int cpu, void *hdl,
+                              long (*func)(void *hdl, void *data), void *data);
 
 void vcpu_show_execution_state(struct vcpu *);
 void vcpu_show_registers(const struct vcpu *);
diff -r aae7cb2f1841 -r 580130fe415c xen/include/xen/sched.h
--- a/xen/include/xen/sched.h	Fri Apr 09 08:54:25 2010 +0100
+++ b/xen/include/xen/sched.h	Tue Apr 13 15:16:12 2010 +0200
@@ -132,8 +132,6 @@
     bool_t           defer_shutdown;
     /* VCPU is paused following shutdown request (d->is_shutting_down)? */
     bool_t           paused_for_shutdown;
-    /* VCPU affinity is temporarily locked from controller changes? */
-    bool_t           affinity_locked;
 
     /*
      * > 0: a single port is being polled;
@@ -581,9 +579,6 @@
 void vcpu_force_reschedule(struct vcpu *v);
 void cpu_disable_scheduler(void);
 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
-int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
-int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity);
-void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
 uint64_t get_cpu_idle_time(unsigned int cpu);
diff -r aae7cb2f1841 -r 580130fe415c xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h	Fri Apr 09 08:54:25 2010 +0100
+++ b/xen/include/xen/softirq.h	Tue Apr 13 15:16:12 2010 +0200
@@ -50,14 +50,17 @@
     bool_t is_scheduled;
     bool_t is_running;
     bool_t is_dead;
+    unsigned int scheduled_on;
     void (*func)(unsigned long);
     unsigned long data;
 };
 
 #define DECLARE_TASKLET(name, func, data) \
-    struct tasklet name = { LIST_HEAD_INIT(name.list), 0, 0, 0, func, data }
+    struct tasklet name = { LIST_HEAD_INIT(name.list), 0, 0, 0, NR_CPUS, \
+                            func, data }
 
 void tasklet_schedule(struct tasklet *t);
+void tasklet_schedule_cpu(struct tasklet *t, int cpu);
 void tasklet_kill(struct tasklet *t);
 void tasklet_init(
     struct tasklet *t, void (*func)(unsigned long), unsigned long data);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 44+ messages in thread