[Qemu-devel] [RFC] queue_work proposal

* [Qemu-devel] [RFC] queue_work proposal
@ 2009-09-03  0:52 Glauber Costa
  2009-09-03  7:36 ` [Qemu-devel] " Paolo Bonzini
  2009-09-03  8:45 ` [Qemu-devel] " Avi Kivity
  0 siblings, 2 replies; 9+ messages in thread
From: Glauber Costa @ 2009-09-03  0:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori

Hi guys

In this patch, I am attaching an early version of a new "on_vcpu" mechanism (after
making it generic, I saw no reason to keep its name). It allows us to guarantee
that a piece of code will be executed in a certain vcpu, indicated by a CPUState.

I am sorry for the big patch, I just dumped what I had so we can have early directions.
When it comes to submission state, I'll split it accordingly.

As we discussed these days at qemu-devel, I am using pthread_set/get_specific for
dealing with thread-local variables. Note that they are not used from signal handlers.
A first optimization would be to use TLS variables where available.

In vl.c, I am providing a version of queue_work for the IO-thread, and other for normal
operation. The "normal" one should fix the problems Jan is having, since it does nothing
more than just issuing the function we want to execute.

The io-thread version is tested with both tcg and kvm, and works (to the extent they were
working before, which in kvm case, is not much)

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 cpu-all.h  |    3 ++
 cpu-defs.h |   15 +++++++++++-
 exec.c     |    1 +
 kvm-all.c  |   58 +++++++++++++++++++----------------------------
 kvm.h      |    7 +++++
 vl.c       |   73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 122 insertions(+), 35 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index 1a6a812..120510b 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -763,6 +763,9 @@ extern CPUState *cpu_single_env;
 extern int64_t qemu_icount;
 extern int use_icount;
 
+void qemu_queue_work(CPUState *env, void (*func)(void *data), void *data, int wait);
+void qemu_flush_work(CPUState *env);
+
 #define CPU_INTERRUPT_HARD   0x02 /* hardware interrupt pending */
 #define CPU_INTERRUPT_EXITTB 0x04 /* exit the current TB (use for x86 a20 case) */
 #define CPU_INTERRUPT_TIMER  0x08 /* internal timer exception pending */
diff --git a/cpu-defs.h b/cpu-defs.h
index b6c8b95..6b564cf 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -134,6 +134,15 @@ typedef struct CPUWatchpoint {
     TAILQ_ENTRY(CPUWatchpoint) entry;
 } CPUWatchpoint;
 
+typedef struct QemuWorkItem {
+    void (*func)(void *data);
+    void *data;
+    int done;
+    int wait;
+    TAILQ_ENTRY(QemuWorkItem) entry;
+} QemuWorkItem;
+
+
 #define CPU_TEMP_BUF_NLONGS 128
 #define CPU_COMMON                                                      \
     struct TranslationBlock *current_tb; /* currently executing TB  */  \
@@ -175,6 +184,9 @@ typedef struct CPUWatchpoint {
     TAILQ_HEAD(watchpoints_head, CPUWatchpoint) watchpoints;            \
     CPUWatchpoint *watchpoint_hit;                                      \
                                                                         \
+    TAILQ_HEAD(queued_work_head, QemuWorkItem) queued_work;             \
+    uint64_t queued_local, queued_total;                                \
+                                                                        \
     struct GDBRegisterState *gdb_regs;                                  \
                                                                         \
     /* Core interrupt code */                                           \
@@ -194,9 +206,10 @@ typedef struct CPUWatchpoint {
     uint32_t created;                                                   \
     struct QemuThread *thread;                                          \
     struct QemuCond *halt_cond;                                         \
+    struct QemuCond *work_cond;                                         \
     const char *cpu_model_str;                                          \
     struct KVMState *kvm_state;                                         \
     struct kvm_run *kvm_run;                                            \
-    int kvm_fd;
+    int kvm_fd;                                                         \
 
 #endif
diff --git a/exec.c b/exec.c
index 21f10b9..2db5e5b 100644
--- a/exec.c
+++ b/exec.c
@@ -577,6 +577,7 @@ void cpu_exec_init(CPUState *env)
     env->numa_node = 0;
     TAILQ_INIT(&env->breakpoints);
     TAILQ_INIT(&env->watchpoints);
+    TAILQ_INIT(&env->queued_work);
     *penv = env;
 #if defined(CONFIG_USER_ONLY)
     cpu_list_unlock();
diff --git a/kvm-all.c b/kvm-all.c
index 7e010c1..eae2e88 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -855,21 +855,34 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
     return ret;
 }
 
-int kvm_vcpu_ioctl(CPUState *env, int type, ...)
+static void kvm_remote_ioctl(void *data)
 {
+    KVMIoctl *arg = data;
     int ret;
+
+    ret = ioctl(arg->fd, arg->type, arg->data);
+    if (ret == -1)
+        ret = -errno;
+    arg->ret = ret;
+}
+
+int kvm_vcpu_ioctl(CPUState *env, int type, ...)
+{
     void *arg;
     va_list ap;
+    KVMIoctl data;
 
     va_start(ap, type);
     arg = va_arg(ap, void *);
     va_end(ap);
 
-    ret = ioctl(env->kvm_fd, type, arg);
-    if (ret == -1)
-        ret = -errno;
+    data.type = type;
+    data.data = arg;
+    data.fd = env->kvm_fd;
 
-    return ret;
+    qemu_queue_work(env, kvm_remote_ioctl, (void *)&data, 1);
+
+    return data.ret;
 }
 
 int kvm_has_sync_mmu(void)
@@ -902,15 +915,6 @@ void kvm_setup_guest_memory(void *start, size_t size)
 }
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
-static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
-{
-    if (env == cpu_single_env) {
-        func(data);
-        return;
-    }
-    abort();
-}
-
 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
                                                  target_ulong pc)
 {
@@ -928,32 +932,18 @@ int kvm_sw_breakpoints_active(CPUState *env)
     return !TAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
 }
 
-struct kvm_set_guest_debug_data {
-    struct kvm_guest_debug dbg;
-    CPUState *env;
-    int err;
-};
-
-static void kvm_invoke_set_guest_debug(void *data)
-{
-    struct kvm_set_guest_debug_data *dbg_data = data;
-    dbg_data->err = kvm_vcpu_ioctl(dbg_data->env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
-}
-
 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
 {
-    struct kvm_set_guest_debug_data data;
+    struct kvm_guest_debug dbg;
 
-    data.dbg.control = 0;
+    dbg.control = 0;
     if (env->singlestep_enabled)
-        data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+        dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
 
-    kvm_arch_update_guest_debug(env, &data.dbg);
-    data.dbg.control |= reinject_trap;
-    data.env = env;
+    kvm_arch_update_guest_debug(env, &dbg);
+    dbg.control |= reinject_trap;
 
-    on_vcpu(env, kvm_invoke_set_guest_debug, &data);
-    return data.err;
+    return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg);
 }
 
 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
diff --git a/kvm.h b/kvm.h
index dbe825f..7595141 100644
--- a/kvm.h
+++ b/kvm.h
@@ -139,4 +139,11 @@ static inline void cpu_synchronize_state(CPUState *env)
     }
 }
 
+typedef struct KVMIoctl {
+    int fd;
+    int type;
+    int ret;
+    void *data;
+} KVMIoctl;
+
 #endif
diff --git a/vl.c b/vl.c
index a63bb75..d247718 100644
--- a/vl.c
+++ b/vl.c
@@ -3638,6 +3638,7 @@ void qemu_init_vcpu(void *_env)
         kvm_init_vcpu(env);
     env->nr_cores = smp_cores;
     env->nr_threads = smp_threads;
+
     return;
 }
 
@@ -3673,8 +3674,14 @@ void vm_stop(int reason)
     do_vm_stop(reason);
 }
 
+void qemu_queue_work(CPUState *env, void (*func)(void *data), void *data, int wait)
+{
+        func(data);
+}
+
 void qemu_mutex_lock_iothread(void) {}
 void qemu_mutex_unlock_iothread(void) {}
+
 #else /* CONFIG_IOTHREAD */
 
 #include "qemu-thread.h"
@@ -3698,6 +3705,23 @@ static void block_io_signals(void);
 static void unblock_io_signals(void);
 static int tcg_has_work(void);
 
+static pthread_key_t current_env;
+
+static CPUState *qemu_get_current_env(void)
+{
+    return pthread_getspecific(current_env);
+}
+
+static void qemu_set_current_env(CPUState *env)
+{
+    pthread_setspecific(current_env, env);
+}
+
+static void qemu_init_current_env(void)
+{
+    pthread_key_create(&current_env, NULL);
+}
+
 static int qemu_init_main_loop(void)
 {
     int ret;
@@ -3710,6 +3734,7 @@ static int qemu_init_main_loop(void)
     qemu_mutex_init(&qemu_fair_mutex);
     qemu_mutex_init(&qemu_global_mutex);
     qemu_mutex_lock(&qemu_global_mutex);
+    qemu_init_current_env();
 
     unblock_io_signals();
     qemu_thread_self(&io_thread);
@@ -3733,6 +3758,8 @@ static void qemu_wait_io_event(CPUState *env)
     qemu_mutex_unlock(&qemu_fair_mutex);
 
     qemu_mutex_lock(&qemu_global_mutex);
+    qemu_flush_work(env);
+
     if (env->stop) {
         env->stop = 0;
         env->stopped = 1;
@@ -3748,6 +3775,8 @@ static void *kvm_cpu_thread_fn(void *arg)
 
     block_io_signals();
     qemu_thread_self(env->thread);
+    qemu_set_current_env(env);
+
     kvm_init_vcpu(env);
 
     /* signal CPU creation */
@@ -3804,6 +3833,46 @@ void qemu_cpu_kick(void *_env)
         qemu_thread_signal(env->thread, SIGUSR1);
 }
 
+void qemu_queue_work(CPUState *env, void (*func)(void *data), void *data, int wait)
+{
+    QemuWorkItem *wii;
+
+    env->queued_total++;
+
+    if (env == qemu_get_current_env()) {
+        env->queued_total++;
+        func(data);
+        return;
+    }
+
+    wii = qemu_mallocz(sizeof(*wii));
+    wii->func = func;
+    wii->data = data;
+    wii->wait = wait;
+    TAILQ_INSERT_TAIL(&env->queued_work, wii, entry);
+
+    qemu_thread_signal(env->thread, SIGUSR1);
+
+    while (wait && !wii->done) {
+        qemu_cond_wait(env->work_cond, &qemu_global_mutex);
+    }
+
+    qemu_free(wii);
+}
+
+void qemu_flush_work(CPUState *env)
+{
+    QemuWorkItem *wi;
+
+    TAILQ_FOREACH(wi, &env->queued_work, entry) {
+        wi->func(wi->data);
+        wi->done = 1;
+        qemu_cond_broadcast(env->work_cond);
+        TAILQ_REMOVE(&env->queued_work, wi, entry);
+    }
+
+}
+
 int qemu_cpu_self(void *env)
 {
     return (cpu_single_env != NULL);
@@ -3960,10 +4029,14 @@ void qemu_init_vcpu(void *_env)
 {
     CPUState *env = _env;
 
+    env->work_cond = qemu_mallocz(sizeof(QemuCond));
+    qemu_cond_init(env->work_cond);
+
     if (kvm_enabled())
         kvm_start_vcpu(env);
     else
         tcg_init_vcpu(env);
+
     env->nr_cores = smp_cores;
     env->nr_threads = smp_threads;
 }
-- 
1.6.2.2

^ permalink raw reply related	[flat|nested] 9+ messages in thread