All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
@ 2009-11-16 17:00 Jan Kiszka
  2009-11-16 18:20 ` Alexander Graf
  2009-11-16 19:14 ` Avi Kivity
  0 siblings, 2 replies; 19+ messages in thread
From: Jan Kiszka @ 2009-11-16 17:00 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, Gleb Natapov

This patch aims at addressing the mp_state writeback issue in a cleaner
fashion. By introducing additional information about the scope of the
scheduled vcpu state writeback, we can simply skin mp_state (and maybe
other specific states in the future) when updating the in-kernel state.

The writeback scope is defined when calling cpu_synchronize_state. It
accumulated, ie. once a full writeback was requested, this will stick
until it was performed.

This unbreaks --disable-kvm builds of qemu-kvm again.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---

A corresponding upstream patch is ready to be posted as well, just
waiting for comments on the general direction from KVM POV.

 cpu-defs.h            |    2 +-
 exec.c                |    4 ++--
 gdbstub.c             |    8 ++++----
 hw/apic.c             |    5 ++---
 hw/pc.c               |    2 +-
 monitor.c             |    6 ++----
 qemu-kvm-ia64.c       |    2 ++
 qemu-kvm-x86.c        |    6 ++++--
 qemu-kvm.c            |   44 +++++++++++++++++++++++++-------------------
 qemu-kvm.h            |   13 ++++++-------
 target-i386/helper.c  |    2 +-
 target-i386/machine.c |    7 ++-----
 target-ppc/machine.c  |    4 ++--
 13 files changed, 54 insertions(+), 51 deletions(-)

diff --git a/cpu-defs.h b/cpu-defs.h
index cf502e9..b7cda81 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -142,7 +142,7 @@ struct KVMCPUState {
     pthread_t thread;
     int signalled;
     struct qemu_work_item *queued_work_first, *queued_work_last;
-    int regs_modified;
+    int writeback_scope;
 };
 
 #define CPU_TEMP_BUF_NLONGS 128
diff --git a/exec.c b/exec.c
index fcffb0f..290a565 100644
--- a/exec.c
+++ b/exec.c
@@ -529,14 +529,14 @@ static void cpu_common_pre_save(void *opaque)
 {
     CPUState *env = opaque;
 
-    cpu_synchronize_state(env);
+    cpu_synchronize_state(env, CPU_SYNC_RUNTIME);
 }
 
 static int cpu_common_pre_load(void *opaque)
 {
     CPUState *env = opaque;
 
-    cpu_synchronize_state(env);
+    cpu_synchronize_state(env, CPU_SYNC_RESET);
     return 0;
 }
 
diff --git a/gdbstub.c b/gdbstub.c
index ad7cdca..5a3e5ee 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1598,7 +1598,7 @@ static void gdb_breakpoint_remove_all(void)
 static void gdb_set_cpu_pc(GDBState *s, target_ulong pc)
 {
 #if defined(TARGET_I386)
-    cpu_synchronize_state(s->c_cpu);
+    cpu_synchronize_state(s->c_cpu, CPU_SYNC_RUNTIME);
     s->c_cpu->eip = pc;
 #elif defined (TARGET_PPC)
     s->c_cpu->nip = pc;
@@ -1785,7 +1785,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
         }
         break;
     case 'g':
-        cpu_synchronize_state(s->g_cpu);
+        cpu_synchronize_state(s->g_cpu, CPU_SYNC_RUNTIME);
         len = 0;
         for (addr = 0; addr < num_g_regs; addr++) {
             reg_size = gdb_read_register(s->g_cpu, mem_buf + len, addr);
@@ -1795,7 +1795,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
         put_packet(s, buf);
         break;
     case 'G':
-        cpu_synchronize_state(s->g_cpu);
+        cpu_synchronize_state(s->g_cpu, CPU_SYNC_RUNTIME);
         registers = mem_buf;
         len = strlen(p) / 2;
         hextomem((uint8_t *)registers, p, len);
@@ -1959,7 +1959,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
             thread = strtoull(p+16, (char **)&p, 16);
             env = find_cpu(thread);
             if (env != NULL) {
-                cpu_synchronize_state(env);
+                cpu_synchronize_state(env, CPU_SYNC_RUNTIME);
                 len = snprintf((char *)mem_buf, sizeof(mem_buf),
                                "CPU#%d [%s]", env->cpu_index,
                                env->halted ? "halted " : "running");
diff --git a/hw/apic.c b/hw/apic.c
index f7cb9d2..abebde3 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -488,7 +488,7 @@ void apic_init_reset(CPUState *env)
     if (!s)
         return;
 
-    cpu_synchronize_state(env);
+    cpu_synchronize_state(env, CPU_SYNC_RESET);
     s->tpr = 0;
     s->spurious_vec = 0xff;
     s->log_dest = 0;
@@ -512,7 +512,6 @@ void apic_init_reset(CPUState *env)
     if (kvm_enabled() && kvm_irqchip_in_kernel()) {
         env->mp_state
             = env->halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE;
-        kvm_load_mpstate(env);
     }
 #endif
 }
@@ -1070,7 +1069,7 @@ static void apic_reset(void *opaque)
     APICState *s = opaque;
     int bsp;
 
-    cpu_synchronize_state(s->cpu_env);
+    cpu_synchronize_state(s->cpu_env, CPU_SYNC_RESET);
 
     bsp = cpu_is_bsp(s->cpu_env);
     s->apicbase = 0xfee00000 |
diff --git a/hw/pc.c b/hw/pc.c
index 5d90f8c..23d4a8e 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1021,7 +1021,7 @@ CPUState *pc_new_cpu(const char *cpu_model)
         fprintf(stderr, "Unable to find x86 CPU definition\n");
         exit(1);
     }
-    env->kvm_cpu_state.regs_modified = 1;
+    env->kvm_cpu_state.writeback_scope = CPU_SYNC_RESET;
     if ((env->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
         env->cpuid_apic_id = env->cpu_index;
         /* APIC reset callback resets cpu */
diff --git a/monitor.c b/monitor.c
index c683eb2..a476224 100644
--- a/monitor.c
+++ b/monitor.c
@@ -394,8 +394,7 @@ static CPUState *mon_get_cpu(void)
     if (!cur_mon->mon_cpu) {
         mon_set_cpu(0);
     }
-    cpu_synchronize_state(cur_mon->mon_cpu);
-    kvm_save_mpstate(cur_mon->mon_cpu);
+    cpu_synchronize_state(cur_mon->mon_cpu, CPU_SYNC_RUNTIME);
     return cur_mon->mon_cpu;
 }
 
@@ -486,8 +485,7 @@ static void do_info_cpus(Monitor *mon, QObject **ret_data)
         const char *answer;
         QDict *cpu = qdict_new();
 
-        cpu_synchronize_state(env);
-        kvm_save_mpstate(env);
+        cpu_synchronize_state(env, CPU_SYNC_RUNTIME);
 
         qdict_put(cpu, "CPU", qint_from_int(env->cpu_index));
         answer = (env == mon->mon_cpu) ? "yes" : "no";
diff --git a/qemu-kvm-ia64.c b/qemu-kvm-ia64.c
index a11fde8..fbf5dd6 100644
--- a/qemu-kvm-ia64.c
+++ b/qemu-kvm-ia64.c
@@ -103,6 +103,8 @@ void kvm_arch_save_mpstate(CPUState *env)
         env->mp_state = -1;
     else
         env->mp_state = mp_state.mp_state;
+    if (kvm_irqchip_in_kernel(kvm_context))
+        env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
 #endif
 }
 
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 9df0d83..2a45189 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -1040,6 +1040,8 @@ void kvm_arch_save_mpstate(CPUState *env)
         env->mp_state = -1;
     else
         env->mp_state = mp_state.mp_state;
+    if (kvm_irqchip_in_kernel())
+        env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
 #else
     env->mp_state = -1;
 #endif
@@ -1675,11 +1677,11 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
 void kvm_arch_process_irqchip_events(CPUState *env)
 {
     if (env->interrupt_request & CPU_INTERRUPT_INIT) {
-        kvm_cpu_synchronize_state(env);
+        kvm_cpu_synchronize_state(env, CPU_SYNC_RESET);
         do_cpu_init(env);
     }
     if (env->interrupt_request & CPU_INTERRUPT_SIPI) {
-        kvm_cpu_synchronize_state(env);
+        kvm_cpu_synchronize_state(env, CPU_SYNC_RESET);
         do_cpu_sipi(env);
     }
 }
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 05caa1c..ca81a4a 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -906,9 +906,9 @@ int kvm_run(CPUState *env)
         run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
 #endif
 
-    if (env->kvm_cpu_state.regs_modified) {
-        kvm_arch_put_registers(env);
-        env->kvm_cpu_state.regs_modified = 0;
+    if (env->kvm_cpu_state.writeback_scope) {
+        kvm_arch_put_registers(env, env->kvm_cpu_state.writeback_scope);
+        env->kvm_cpu_state.writeback_scope = 0;
     }
 
     r = pre_kvm_run(kvm, env);
@@ -1533,22 +1533,31 @@ static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
 
 void kvm_arch_get_registers(CPUState *env)
 {
-	kvm_arch_save_regs(env);
+    kvm_arch_save_regs(env);
+    kvm_arch_save_mpstate(env);
 }
 
-static void do_kvm_cpu_synchronize_state(void *_env)
+void kvm_arch_put_registers(CPUState *env, int scope)
 {
-    CPUState *env = _env;
-    if (!env->kvm_cpu_state.regs_modified) {
-        kvm_arch_get_registers(env);
-        env->kvm_cpu_state.regs_modified = 1;
+    kvm_load_registers(env);
+    if (scope & CPU_SYNC_RESET) {
+        kvm_load_mpstate(env);
     }
 }
 
-void kvm_cpu_synchronize_state(CPUState *env)
+static void kvm_do_synchronize_state(void *_env)
 {
-    if (!env->kvm_cpu_state.regs_modified)
-        on_vcpu(env, do_kvm_cpu_synchronize_state, env);
+    CPUState *env = _env;
+
+    kvm_arch_get_registers(env);
+}
+
+void kvm_cpu_synchronize_state(CPUState *env, int writeback_scope)
+{
+    if (!env->kvm_cpu_state.writeback_scope) {
+        on_vcpu(env, kvm_do_synchronize_state, env);
+        env->kvm_cpu_state.writeback_scope |= writeback_scope;
+    }
 }
 
 static void inject_interrupt(void *data)
@@ -1627,10 +1636,6 @@ static void kvm_do_save_mpstate(void *_env)
     CPUState *env = _env;
 
     kvm_arch_save_mpstate(env);
-#ifdef KVM_CAP_MP_STATE
-    if (kvm_irqchip_in_kernel())
-        env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
-#endif
 }
 
 void kvm_save_mpstate(CPUState *env)
@@ -2369,9 +2374,10 @@ static void kvm_invoke_set_guest_debug(void *data)
 {
     struct kvm_set_guest_debug_data *dbg_data = data;
 
-    if (cpu_single_env->kvm_cpu_state.regs_modified) {
-        kvm_arch_put_registers(cpu_single_env);
-        cpu_single_env->kvm_cpu_state.regs_modified = 0;
+    if (cpu_single_env->kvm_cpu_state.writeback_scope) {
+        kvm_arch_put_registers(cpu_single_env,
+                               cpu_single_env->kvm_cpu_state.writeback_scope);
+        cpu_single_env->kvm_cpu_state.writeback_scope = 0;
     }
     dbg_data->err =
         kvm_set_guest_debug(cpu_single_env,
diff --git a/qemu-kvm.h b/qemu-kvm.h
index f897c7c..f791345 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -1098,18 +1098,17 @@ static inline int kvm_sync_vcpus(void)
 
 #ifndef QEMU_KVM_NO_CPU
 void kvm_arch_get_registers(CPUState *env);
+void kvm_arch_put_registers(CPUState *env, int scope);
 
-static inline void kvm_arch_put_registers(CPUState *env)
-{
-    kvm_load_registers(env);
-}
+void kvm_cpu_synchronize_state(CPUState *env, int writeback_scope);
 
-void kvm_cpu_synchronize_state(CPUState *env);
+#define CPU_SYNC_RUNTIME	(1 << 0)
+#define CPU_SYNC_RESET		(1 << 1)
 
-static inline void cpu_synchronize_state(CPUState *env)
+static inline void cpu_synchronize_state(CPUState *env, int writeback_scope)
 {
     if (kvm_enabled()) {
-        kvm_cpu_synchronize_state(env);
+        kvm_cpu_synchronize_state(env, writeback_scope);
     }
 }
 
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 4920708..8e2b15f 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -745,7 +745,7 @@ void cpu_dump_state(CPUState *env, FILE *f,
     char cc_op_name[32];
     static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" };
 
-    cpu_synchronize_state(env);
+    cpu_synchronize_state(env, CPU_SYNC_RUNTIME);
 
     eflags = env->eflags;
 #ifdef TARGET_X86_64
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 2b88fea..23ae142 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -323,8 +323,7 @@ static void cpu_pre_save(void *opaque)
     CPUState *env = opaque;
     int i, bit;
 
-    cpu_synchronize_state(env);
-    kvm_save_mpstate(env);
+    cpu_synchronize_state(env, CPU_SYNC_RUNTIME);
 
     /* FPU */
     env->fpus_vmstate = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
@@ -355,7 +354,7 @@ static int cpu_pre_load(void *opaque)
 {
     CPUState *env = opaque;
 
-    cpu_synchronize_state(env);
+    cpu_synchronize_state(env, CPU_SYNC_RESET);
     return 0;
 }
 
@@ -386,8 +385,6 @@ static int cpu_post_load(void *opaque, int version_id)
     }
 
     tlb_flush(env, 1);
-    kvm_load_mpstate(env);
-
     return 0;
 }
 
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index ead38e1..f190279 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -8,7 +8,7 @@ void cpu_save(QEMUFile *f, void *opaque)
     CPUState *env = (CPUState *)opaque;
     unsigned int i, j;
 
-    cpu_synchronize_state(env);
+    cpu_synchronize_state(env, CPU_SYNC_RUNTIME);
 
     for (i = 0; i < 32; i++)
         qemu_put_betls(f, &env->gpr[i]);
@@ -97,7 +97,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
     CPUState *env = (CPUState *)opaque;
     unsigned int i, j;
 
-    cpu_synchronize_state(env);
+    cpu_synchronize_state(env, CPU_SYNC_RESET);
 
     for (i = 0; i < 32; i++)
         qemu_get_betls(f, &env->gpr[i]);

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-16 17:00 [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state Jan Kiszka
@ 2009-11-16 18:20 ` Alexander Graf
  2009-11-16 19:14 ` Avi Kivity
  1 sibling, 0 replies; 19+ messages in thread
From: Alexander Graf @ 2009-11-16 18:20 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Avi Kivity, Marcelo Tosatti, kvm, Gleb Natapov


Am 16.11.2009 um 18:00 schrieb Jan Kiszka <jan.kiszka@siemens.com>:

> This patch aims at addressing the mp_state writeback issue in a  
> cleaner
> fashion. By introducing additional information about the scope of the
> scheduled vcpu state writeback, we can simply skin mp_state (and maybe
> other specific states in the future) when updating the in-kernel  
> state.
>
> The writeback scope is defined when calling cpu_synchronize_state. It
> accumulated, ie. once a full writeback was requested, this will stick
> until it was performed.
>
> This unbreaks --disable-kvm builds of qemu-kvm again.
>
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>
> A corresponding upstream patch is ready to be posted as well, just
> waiting for comments on the general direction from KVM POV.

I think I'd rather have a sync function that implicitly does the  
RUNTIME sync, the way it is now, and an 'advanced' one you can pass a  
constant what it syncs.

That way most code continues to work the way it is now. It also makes  
it easier readable IMHO.


Alex

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-16 17:00 [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state Jan Kiszka
  2009-11-16 18:20 ` Alexander Graf
@ 2009-11-16 19:14 ` Avi Kivity
  2009-11-16 21:22   ` Jan Kiszka
  1 sibling, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-11-16 19:14 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

On 11/16/2009 07:00 PM, Jan Kiszka wrote:
> This patch aims at addressing the mp_state writeback issue in a cleaner
> fashion.

What's the issue?  the fact that mp_state is updated whenever state is 
synchronized, while it could be simultaneously updated from other vcpus 
(which latter updates are then lost)?

> By introducing additional information about the scope of the
> scheduled vcpu state writeback, we can simply skin mp_state (and maybe
> other specific states in the future) when updating the in-kernel state.
>
> The writeback scope is defined when calling cpu_synchronize_state. It
> accumulated, ie. once a full writeback was requested, this will stick
> until it was performed.
>    

Maybe it's just simpler to divorce mp_state from the rest of the state.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-16 19:14 ` Avi Kivity
@ 2009-11-16 21:22   ` Jan Kiszka
  2009-11-17  8:05     ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Jan Kiszka @ 2009-11-16 21:22 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

[-- Attachment #1: Type: text/plain, Size: 1365 bytes --]

Avi Kivity wrote:
> On 11/16/2009 07:00 PM, Jan Kiszka wrote:
>> This patch aims at addressing the mp_state writeback issue in a cleaner
>> fashion.
> 
> What's the issue?  the fact that mp_state is updated whenever state is
> synchronized, while it could be simultaneously updated from other vcpus
> (which latter updates are then lost)?

Right, the issue b8a7857071 addressed. But that approach spreads more
kvm_* fragments in unrelated qemu code, e.g. the monitor, and fails to
update other parts (gdbstub). And it doesn't care about what happens if
kvm is off at build or runtime. Such things are better addressed in
upstream by encapsulating kvm calls in synchronization points.

> 
>> By introducing additional information about the scope of the
>> scheduled vcpu state writeback, we can simply skin mp_state (and maybe
>> other specific states in the future) when updating the in-kernel state.
>>
>> The writeback scope is defined when calling cpu_synchronize_state. It
>> accumulated, ie. once a full writeback was requested, this will stick
>> until it was performed.
>>    
> 
> Maybe it's just simpler to divorce mp_state from the rest of the state.

That won't solve the core issue. mp_state *is* part of the state, and
needs to be read (to update halted) and sometimes also written when the
state was hard reset.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-16 21:22   ` Jan Kiszka
@ 2009-11-17  8:05     ` Avi Kivity
  2009-11-17  8:14       ` Jan Kiszka
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-11-17  8:05 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

On 11/16/2009 11:22 PM, Jan Kiszka wrote:
> Avi Kivity wrote:
>    
>> On 11/16/2009 07:00 PM, Jan Kiszka wrote:
>>      
>>> This patch aims at addressing the mp_state writeback issue in a cleaner
>>> fashion.
>>>        
>> What's the issue?  the fact that mp_state is updated whenever state is
>> synchronized, while it could be simultaneously updated from other vcpus
>> (which latter updates are then lost)?
>>      
> Right, the issue b8a7857071 addressed. But that approach spreads more
> kvm_* fragments in unrelated qemu code, e.g. the monitor, and fails to
> update other parts (gdbstub). And it doesn't care about what happens if
> kvm is off at build or runtime. Such things are better addressed in
> upstream by encapsulating kvm calls in synchronization points.
>    

Note we have the same issue with nmi and the sipi vector - any vcpu 
state that is updated outside the vcpu thread.  These are particularly 
bad since we can't exclude them from updates without excluding other 
state as well.

The whole issue is tricky.  I'm inclined to pretend we never meant any 
vcpu state (outside lapic) to be asynchronous and declare the whole 
thing a bug.  We could fix it by modeling external changes to state 
(INIT, SIPI, NMI) as messages queued to the vcpu, to be processed in the 
vcpu thread.  The queue would be drained before running the vcpu or 
before reading state from userspace, so the message queue contents can 
never be observed and never lost.

Of course, we can't really implement this as a queue (SIGSTOP vcpu 
thread -> overflow), but a word is sufficient.  INIT writes the word, 
everything else uses compare-and-swap or set_bit to raise events (e.g. 
SIPI = do { oldq = vcpu->queue; newq = (oldq & ~SIPI_MASK) | sipi_vector 
| RUNNING; } while (!cas(&vcpu->queue, oldq, newq)))

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17  8:05     ` Avi Kivity
@ 2009-11-17  8:14       ` Jan Kiszka
  2009-11-17  8:37         ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Jan Kiszka @ 2009-11-17  8:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

[-- Attachment #1: Type: text/plain, Size: 2164 bytes --]

Avi Kivity wrote:
> On 11/16/2009 11:22 PM, Jan Kiszka wrote:
>> Avi Kivity wrote:
>>   
>>> On 11/16/2009 07:00 PM, Jan Kiszka wrote:
>>>     
>>>> This patch aims at addressing the mp_state writeback issue in a cleaner
>>>> fashion.
>>>>        
>>> What's the issue?  the fact that mp_state is updated whenever state is
>>> synchronized, while it could be simultaneously updated from other vcpus
>>> (which latter updates are then lost)?
>>>      
>> Right, the issue b8a7857071 addressed. But that approach spreads more
>> kvm_* fragments in unrelated qemu code, e.g. the monitor, and fails to
>> update other parts (gdbstub). And it doesn't care about what happens if
>> kvm is off at build or runtime. Such things are better addressed in
>> upstream by encapsulating kvm calls in synchronization points.
>>    
> 
> Note we have the same issue with nmi and the sipi vector - any vcpu

Good point.

> state that is updated outside the vcpu thread.  These are particularly
> bad since we can't exclude them from updates without excluding other
> state as well.

We easily can, using the very same mechanism: No need to overwrite any
of the kvm_vcpu_events during runtime, only on reset/vmload).

> 
> The whole issue is tricky.  I'm inclined to pretend we never meant any
> vcpu state (outside lapic) to be asynchronous and declare the whole
> thing a bug.  We could fix it by modeling external changes to state
> (INIT, SIPI, NMI) as messages queued to the vcpu, to be processed in the
> vcpu thread.  The queue would be drained before running the vcpu or
> before reading state from userspace, so the message queue contents can
> never be observed and never lost.
> 
> Of course, we can't really implement this as a queue (SIGSTOP vcpu
> thread -> overflow), but a word is sufficient.  INIT writes the word,
> everything else uses compare-and-swap or set_bit to raise events (e.g.
> SIPI = do { oldq = vcpu->queue; newq = (oldq & ~SIPI_MASK) | sipi_vector
> | RUNNING; } while (!cas(&vcpu->queue, oldq, newq)))
> 

I do not yet see why we need this complication, why the proposed model
isn't enough.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17  8:14       ` Jan Kiszka
@ 2009-11-17  8:37         ` Avi Kivity
  2009-11-17  9:16           ` Jan Kiszka
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-11-17  8:37 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

On 11/17/2009 10:14 AM, Jan Kiszka wrote:
>
>
>> state that is updated outside the vcpu thread.  These are particularly
>> bad since we can't exclude them from updates without excluding other
>> state as well.
>>      
> We easily can, using the very same mechanism: No need to overwrite any
> of the kvm_vcpu_events during runtime, only on reset/vmload).
>    

That's because qemu has no need for this.  But kvm is more than just 
serving qemu, we try to be more general.  That said, I can't really see 
anyone wanting to arbitrarily inject an exception.

>> The whole issue is tricky.  I'm inclined to pretend we never meant any
>> vcpu state (outside lapic) to be asynchronous and declare the whole
>> thing a bug.  We could fix it by modeling external changes to state
>> (INIT, SIPI, NMI) as messages queued to the vcpu, to be processed in the
>> vcpu thread.  The queue would be drained before running the vcpu or
>> before reading state from userspace, so the message queue contents can
>> never be observed and never lost.
>>
>> Of course, we can't really implement this as a queue (SIGSTOP vcpu
>> thread ->  overflow), but a word is sufficient.  INIT writes the word,
>> everything else uses compare-and-swap or set_bit to raise events (e.g.
>> SIPI = do { oldq = vcpu->queue; newq = (oldq&  ~SIPI_MASK) | sipi_vector
>> | RUNNING; } while (!cas(&vcpu->queue, oldq, newq)))
>>
>>      
> I do not yet see why we need this complication, why the proposed model
> isn't enough.
>    

The current interface is subtly dangerous, you can't run set(get()) as 
you would expect.

(well you can't with the lapic or the tsc msr either...)

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17  8:37         ` Avi Kivity
@ 2009-11-17  9:16           ` Jan Kiszka
  2009-11-17 12:37             ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Jan Kiszka @ 2009-11-17  9:16 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

[-- Attachment #1: Type: text/plain, Size: 2074 bytes --]

Avi Kivity wrote:
> On 11/17/2009 10:14 AM, Jan Kiszka wrote:
>>
>>
>>> state that is updated outside the vcpu thread.  These are particularly
>>> bad since we can't exclude them from updates without excluding other
>>> state as well.
>>>      
>> We easily can, using the very same mechanism: No need to overwrite any
>> of the kvm_vcpu_events during runtime, only on reset/vmload).
>>    
> 
> That's because qemu has no need for this.  But kvm is more than just
> serving qemu, we try to be more general.  That said, I can't really see
> anyone wanting to arbitrarily inject an exception.

Well, the current API comes with millions of ways to shoot yourself into
the foot. I don't think we can avoid them all.

> 
>>> The whole issue is tricky.  I'm inclined to pretend we never meant any
>>> vcpu state (outside lapic) to be asynchronous and declare the whole
>>> thing a bug.  We could fix it by modeling external changes to state
>>> (INIT, SIPI, NMI) as messages queued to the vcpu, to be processed in the
>>> vcpu thread.  The queue would be drained before running the vcpu or
>>> before reading state from userspace, so the message queue contents can
>>> never be observed and never lost.
>>>
>>> Of course, we can't really implement this as a queue (SIGSTOP vcpu
>>> thread ->  overflow), but a word is sufficient.  INIT writes the word,
>>> everything else uses compare-and-swap or set_bit to raise events (e.g.
>>> SIPI = do { oldq = vcpu->queue; newq = (oldq&  ~SIPI_MASK) | sipi_vector
>>> | RUNNING; } while (!cas(&vcpu->queue, oldq, newq)))
>>>
>>>      
>> I do not yet see why we need this complication, why the proposed model
>> isn't enough.
>>    
> 
> The current interface is subtly dangerous, you can't run set(get()) as
> you would expect.
> 
> (well you can't with the lapic or the tsc msr either...)
> 

We may start documenting such dependency in kvm/api.txt. On the other
hand, if you have a get/set interface vs. an inject channel, I think
it's obvious that one can overwrite the other.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17  9:16           ` Jan Kiszka
@ 2009-11-17 12:37             ` Avi Kivity
  2009-11-17 13:05               ` Jan Kiszka
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-11-17 12:37 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

On 11/17/2009 11:16 AM, Jan Kiszka wrote:
>
>> That's because qemu has no need for this.  But kvm is more than just
>> serving qemu, we try to be more general.  That said, I can't really see
>> anyone wanting to arbitrarily inject an exception.
>>      
> Well, the current API comes with millions of ways to shoot yourself into
> the foot. I don't think we can avoid them all.
>    

It would be nice to make the API saner.  Do you know of more holes?

>> The current interface is subtly dangerous, you can't run set(get()) as
>> you would expect.
>>
>> (well you can't with the lapic or the tsc msr either...)
>>
>>      
> We may start documenting such dependency in kvm/api.txt. On the other
> hand, if you have a get/set interface vs. an inject channel, I think
> it's obvious that one can overwrite the other.
>    

Problem is, the inject channels are implied (APIC messages in smp 
guests).  Documentation is good, but if we can avoid it that's better.

Note the only way to rmw vcpu events during smp is pausing the guest, 
because of this race.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17 12:37             ` Avi Kivity
@ 2009-11-17 13:05               ` Jan Kiszka
  2009-11-17 13:28                 ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Jan Kiszka @ 2009-11-17 13:05 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

[-- Attachment #1: Type: text/plain, Size: 1493 bytes --]

Avi Kivity wrote:
> On 11/17/2009 11:16 AM, Jan Kiszka wrote:
>>
>>> That's because qemu has no need for this.  But kvm is more than just
>>> serving qemu, we try to be more general.  That said, I can't really see
>>> anyone wanting to arbitrarily inject an exception.
>>>      
>> Well, the current API comes with millions of ways to shoot yourself into
>> the foot. I don't think we can avoid them all.
>>    
> 
> It would be nice to make the API saner.  Do you know of more holes?
> 
>>> The current interface is subtly dangerous, you can't run set(get()) as
>>> you would expect.
>>>
>>> (well you can't with the lapic or the tsc msr either...)
>>>
>>>      
>> We may start documenting such dependency in kvm/api.txt. On the other
>> hand, if you have a get/set interface vs. an inject channel, I think
>> it's obvious that one can overwrite the other.
>>    
> 
> Problem is, the inject channels are implied (APIC messages in smp
> guests).  Documentation is good, but if we can avoid it that's better.
> 
> Note the only way to rmw vcpu events during smp is pausing the guest,
> because of this race.

That's what qemu does on reset and load.

The alternative would be a complex get&lock/put&unlock + a queue for
async events during the lock + an option to ignore what was queued when
doing a true reset. Back to square #1: we would still need the proposed
high-level interface to communicate the difference between replay and
drop queue.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17 13:05               ` Jan Kiszka
@ 2009-11-17 13:28                 ` Avi Kivity
  2009-11-17 14:12                   ` Jan Kiszka
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-11-17 13:28 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

On 11/17/2009 03:05 PM, Jan Kiszka wrote:
>
>> Problem is, the inject channels are implied (APIC messages in smp
>> guests).  Documentation is good, but if we can avoid it that's better.
>>
>> Note the only way to rmw vcpu events during smp is pausing the guest,
>> because of this race.
>>      
> That's what qemu does on reset and load.
>    

These aren't rmw.

> The alternative would be a complex get&lock/put&unlock + a queue for
> async events during the lock + an option to ignore what was queued when
> doing a true reset. Back to square #1: we would still need the proposed
> high-level interface to communicate the difference between replay and
> drop queue.
>    

There's no need for get+lock / put+unlock; a normal get/put with the 
addition that get flushes the queue suffices.  To make sure queued 
events don't affect set you need to stop the entire VM before setting 
state, but you need to do that anyway for non-rmw writes.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17 13:28                 ` Avi Kivity
@ 2009-11-17 14:12                   ` Jan Kiszka
  2009-11-17 14:25                     ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Jan Kiszka @ 2009-11-17 14:12 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

[-- Attachment #1: Type: text/plain, Size: 1638 bytes --]

Avi Kivity wrote:
> On 11/17/2009 03:05 PM, Jan Kiszka wrote:
>>
>>> Problem is, the inject channels are implied (APIC messages in smp
>>> guests).  Documentation is good, but if we can avoid it that's better.
>>>
>>> Note the only way to rmw vcpu events during smp is pausing the guest,
>>> because of this race.
>>>      
>> That's what qemu does on reset and load.
>>    
> 
> These aren't rmw.

Not logically, but ATM technically.

> 
>> The alternative would be a complex get&lock/put&unlock + a queue for
>> async events during the lock + an option to ignore what was queued when
>> doing a true reset. Back to square #1: we would still need the proposed
>> high-level interface to communicate the difference between replay and
>> drop queue.
>>    
> 
> There's no need for get+lock / put+unlock; a normal get/put with the

You need to track when to queue and when to apply directly. Call it lock
or call it something else.

> addition that get flushes the queue suffices.  To make sure queued
> events don't affect set you need to stop the entire VM before setting
> state, but you need to do that anyway for non-rmw writes.
> 

Well, sounds good, but it will be a non-trivial change in the interface
semantics. At bare minimum, we would need a new mp_state interface. If
we would count mp_state to our new event structure (hmm...), then we
could confine the semantical changes to that new IOCTL pair. But how to
deal with existing KVM kernels with their mp_state interface? It's a bit
like the vcpu state thing: we are already down a specific road, and it's
hard to turn around.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17 14:12                   ` Jan Kiszka
@ 2009-11-17 14:25                     ` Avi Kivity
  2009-11-17 16:50                       ` Jan Kiszka
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-11-17 14:25 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

On 11/17/2009 04:12 PM, Jan Kiszka wrote:
>>
>>> The alternative would be a complex get&lock/put&unlock + a queue for
>>> async events during the lock + an option to ignore what was queued when
>>> doing a true reset. Back to square #1: we would still need the proposed
>>> high-level interface to communicate the difference between replay and
>>> drop queue.
>>>    
>>>       
>> There's no need for get+lock / put+unlock; a normal get/put with the
>>     
> You need to track when to queue and when to apply directly. Call it lock
> or call it something else.
>   

You always queue.  When starting vcpu_run() or reading state to
userspace you flush the queue.

The hardware equivalent is posting APIC messages, and the core executing
them.

>> addition that get flushes the queue suffices.  To make sure queued
>> events don't affect set you need to stop the entire VM before setting
>> state, but you need to do that anyway for non-rmw writes.
>>
>>     
> Well, sounds good, but it will be a non-trivial change in the interface
> semantics. At bare minimum, we would need a new mp_state interface. If
> we would count mp_state to our new event structure (hmm...), then we
> could confine the semantical changes to that new IOCTL pair. But how to
> deal with existing KVM kernels with their mp_state interface? It's a bit
> like the vcpu state thing: we are already down a specific road, and it's
> hard to turn around.
>   

I think we're not on the same page here.  As I see it, no interface
change is needed at all.

It's true that existing kernels don't handle this properly, which is why
I said I'm willing to treat it as a bug (and thus the -stable treatment
etc.).  I admit it's a stretch since this is not going to be trivial
(though I think less complex that you believe).

Putting mp_state into the events structure is reasonable regardless of
this issue (and doable since we haven't pushed it to 2.6.33 yet).  But I
want to understand why you think it's needed.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17 14:25                     ` Avi Kivity
@ 2009-11-17 16:50                       ` Jan Kiszka
  2009-11-17 16:58                         ` Jan Kiszka
  2009-11-17 16:59                         ` Avi Kivity
  0 siblings, 2 replies; 19+ messages in thread
From: Jan Kiszka @ 2009-11-17 16:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

[-- Attachment #1: Type: text/plain, Size: 2386 bytes --]

Avi Kivity wrote:
> On 11/17/2009 04:12 PM, Jan Kiszka wrote:
>>>> The alternative would be a complex get&lock/put&unlock + a queue for
>>>> async events during the lock + an option to ignore what was queued when
>>>> doing a true reset. Back to square #1: we would still need the proposed
>>>> high-level interface to communicate the difference between replay and
>>>> drop queue.
>>>>    
>>>>       
>>> There's no need for get+lock / put+unlock; a normal get/put with the
>>>     
>> You need to track when to queue and when to apply directly. Call it lock
>> or call it something else.
>>   
> 
> You always queue.  When starting vcpu_run() or reading state to
> userspace you flush the queue.

Now I finally got your idea.

> 
> The hardware equivalent is posting APIC messages, and the core executing
> them.
> 
>>> addition that get flushes the queue suffices.  To make sure queued
>>> events don't affect set you need to stop the entire VM before setting
>>> state, but you need to do that anyway for non-rmw writes.
>>>
>>>     
>> Well, sounds good, but it will be a non-trivial change in the interface
>> semantics. At bare minimum, we would need a new mp_state interface. If
>> we would count mp_state to our new event structure (hmm...), then we
>> could confine the semantical changes to that new IOCTL pair. But how to
>> deal with existing KVM kernels with their mp_state interface? It's a bit
>> like the vcpu state thing: we are already down a specific road, and it's
>> hard to turn around.
>>   
> 
> I think we're not on the same page here.  As I see it, no interface
> change is needed at all.
> 
> It's true that existing kernels don't handle this properly, which is why
> I said I'm willing to treat it as a bug (and thus the -stable treatment
> etc.).  I admit it's a stretch since this is not going to be trivial
> (though I think less complex that you believe).
> 
> Putting mp_state into the events structure is reasonable regardless of
> this issue (and doable since we haven't pushed it to 2.6.33 yet).  But I
> want to understand why you think it's needed.
> 

That wouldn't be required anymore with the "always queue" policy.

But what would you queue at all? Only mp_state, nmi_pending and
sipi_vector? Or also all the relevant PIC and LAPIC states that might be
changed asynchronously?

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17 16:50                       ` Jan Kiszka
@ 2009-11-17 16:58                         ` Jan Kiszka
  2009-11-18 13:48                           ` Avi Kivity
  2009-11-17 16:59                         ` Avi Kivity
  1 sibling, 1 reply; 19+ messages in thread
From: Jan Kiszka @ 2009-11-17 16:58 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

[-- Attachment #1: Type: text/plain, Size: 2794 bytes --]

Jan Kiszka wrote:
> Avi Kivity wrote:
>> On 11/17/2009 04:12 PM, Jan Kiszka wrote:
>>>>> The alternative would be a complex get&lock/put&unlock + a queue for
>>>>> async events during the lock + an option to ignore what was queued when
>>>>> doing a true reset. Back to square #1: we would still need the proposed
>>>>> high-level interface to communicate the difference between replay and
>>>>> drop queue.
>>>>>    
>>>>>       
>>>> There's no need for get+lock / put+unlock; a normal get/put with the
>>>>     
>>> You need to track when to queue and when to apply directly. Call it lock
>>> or call it something else.
>>>   
>> You always queue.  When starting vcpu_run() or reading state to
>> userspace you flush the queue.
> 
> Now I finally got your idea.
> 
>> The hardware equivalent is posting APIC messages, and the core executing
>> them.
>>
>>>> addition that get flushes the queue suffices.  To make sure queued
>>>> events don't affect set you need to stop the entire VM before setting
>>>> state, but you need to do that anyway for non-rmw writes.
>>>>
>>>>     
>>> Well, sounds good, but it will be a non-trivial change in the interface
>>> semantics. At bare minimum, we would need a new mp_state interface. If
>>> we would count mp_state to our new event structure (hmm...), then we
>>> could confine the semantical changes to that new IOCTL pair. But how to
>>> deal with existing KVM kernels with their mp_state interface? It's a bit
>>> like the vcpu state thing: we are already down a specific road, and it's
>>> hard to turn around.
>>>   
>> I think we're not on the same page here.  As I see it, no interface
>> change is needed at all.
>>
>> It's true that existing kernels don't handle this properly, which is why
>> I said I'm willing to treat it as a bug (and thus the -stable treatment
>> etc.).  I admit it's a stretch since this is not going to be trivial
>> (though I think less complex that you believe).
>>
>> Putting mp_state into the events structure is reasonable regardless of
>> this issue (and doable since we haven't pushed it to 2.6.33 yet).  But I
>> want to understand why you think it's needed.
>>
> 
> That wouldn't be required anymore with the "always queue" policy.

Hmm, unless we need mp_state manipulation analogously to KVM_NMI vs.
KVM_SET_VCPU_STATE: The former will queue, the latter write, but may be
overwritten by anything queued. If you just queue KVM_SET_MP_STATE, you
still have a conflict between concurrent manipulations from user space,
something we want to resolve automagically.

> 
> But what would you queue at all? Only mp_state, nmi_pending and
> sipi_vector? Or also all the relevant PIC and LAPIC states that might be
> changed asynchronously?
> 

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17 16:50                       ` Jan Kiszka
  2009-11-17 16:58                         ` Jan Kiszka
@ 2009-11-17 16:59                         ` Avi Kivity
  2009-11-18  9:50                           ` Jan Kiszka
  1 sibling, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-11-17 16:59 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

On 11/17/2009 06:50 PM, Jan Kiszka wrote:
>
>> I think we're not on the same page here.  As I see it, no interface
>> change is needed at all.
>>
>> It's true that existing kernels don't handle this properly, which is why
>> I said I'm willing to treat it as a bug (and thus the -stable treatment
>> etc.).  I admit it's a stretch since this is not going to be trivial
>> (though I think less complex that you believe).
>>
>> Putting mp_state into the events structure is reasonable regardless of
>> this issue (and doable since we haven't pushed it to 2.6.33 yet).  But I
>> want to understand why you think it's needed.
>>
>>      
> That wouldn't be required anymore with the "always queue" policy.
>    

It makes sense from a grouping point of view... maybe.

> But what would you queue at all? Only mp_state, nmi_pending and
> sipi_vector?

INIT, too.

> Or also all the relevant PIC and LAPIC states that might be
> changed asynchronously?
>    

LAPIC cannot support RMW atm because of the timer counts.  We may in the 
future support LAPIC as well if needed.  PIC and IOAPIC need full vm 
stop due to many async sources (KVM_IRQ_LINE from multiple threads, 
device assignment interrupts, irqfd, lapic EOI messages).  vcpu state 
has the advantage of being almost completely synchronous.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17 16:59                         ` Avi Kivity
@ 2009-11-18  9:50                           ` Jan Kiszka
  2009-11-18 13:46                             ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Jan Kiszka @ 2009-11-18  9:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

[-- Attachment #1: Type: text/plain, Size: 1525 bytes --]

Avi Kivity wrote:
> On 11/17/2009 06:50 PM, Jan Kiszka wrote:
>>
>>> I think we're not on the same page here.  As I see it, no interface
>>> change is needed at all.
>>>
>>> It's true that existing kernels don't handle this properly, which is why
>>> I said I'm willing to treat it as a bug (and thus the -stable treatment
>>> etc.).  I admit it's a stretch since this is not going to be trivial
>>> (though I think less complex that you believe).
>>>
>>> Putting mp_state into the events structure is reasonable regardless of
>>> this issue (and doable since we haven't pushed it to 2.6.33 yet).  But I
>>> want to understand why you think it's needed.
>>>
>>>      
>> That wouldn't be required anymore with the "always queue" policy.
>>    
> 
> It makes sense from a grouping point of view... maybe.
> 
>> But what would you queue at all? Only mp_state, nmi_pending and
>> sipi_vector?
> 
> INIT, too.

INIT should be handled by queuing up the next mp_state.

BTW, as we do not inject mp_state changes from user space during
runtime, the issue I saw with the current interface is not existing. We
just need to add that queuing feature to asynchronous in-kernel mp_state
changes, and we should be fine.


Let's assume we will have such changes in future kernels: should
qemu-kvm and qemu upstream also bother about older kernels and establish
workarounds? Because then we need to find a cleaner approach than the
current one, and my proposed patch comes into the game again.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-18  9:50                           ` Jan Kiszka
@ 2009-11-18 13:46                             ` Avi Kivity
  0 siblings, 0 replies; 19+ messages in thread
From: Avi Kivity @ 2009-11-18 13:46 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

On 11/18/2009 11:50 AM, Jan Kiszka wrote:
>
>> INIT, too.
>>      
> INIT should be handled by queuing up the next mp_state.
>    

And clearing the previous queue; otherwise our queue is unbounded.

> BTW, as we do not inject mp_state changes from user space during
> runtime, the issue I saw with the current interface is not existing. We
> just need to add that queuing feature to asynchronous in-kernel mp_state
> changes, and we should be fine.
>
>
> Let's assume we will have such changes in future kernels: should
> qemu-kvm and qemu upstream also bother about older kernels and establish
> workarounds? Because then we need to find a cleaner approach than the
> current one, and my proposed patch comes into the game again.
>    

If we treat this as a bug, then we fix the older kernels too.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state
  2009-11-17 16:58                         ` Jan Kiszka
@ 2009-11-18 13:48                           ` Avi Kivity
  0 siblings, 0 replies; 19+ messages in thread
From: Avi Kivity @ 2009-11-18 13:48 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Gleb Natapov

On 11/17/2009 06:58 PM, Jan Kiszka wrote:
>
>> That wouldn't be required anymore with the "always queue" policy.
>>      
> Hmm, unless we need mp_state manipulation analogously to KVM_NMI vs.
> KVM_SET_VCPU_STATE: The former will queue, the latter write, but may be
> overwritten by anything queued. If you just queue KVM_SET_MP_STATE, you
> still have a conflict between concurrent manipulations from user space,
> something we want to resolve automagically.
>    

The idea is to queue.  But queueing INIT state clears the queue (we're 
pretending to send an INIT signal over the apic bus).

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2009-11-18 13:48 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-16 17:00 [RFC][PATCH] qemu-kvm: Introduce writeback scope for cpu_synchronize_state Jan Kiszka
2009-11-16 18:20 ` Alexander Graf
2009-11-16 19:14 ` Avi Kivity
2009-11-16 21:22   ` Jan Kiszka
2009-11-17  8:05     ` Avi Kivity
2009-11-17  8:14       ` Jan Kiszka
2009-11-17  8:37         ` Avi Kivity
2009-11-17  9:16           ` Jan Kiszka
2009-11-17 12:37             ` Avi Kivity
2009-11-17 13:05               ` Jan Kiszka
2009-11-17 13:28                 ` Avi Kivity
2009-11-17 14:12                   ` Jan Kiszka
2009-11-17 14:25                     ` Avi Kivity
2009-11-17 16:50                       ` Jan Kiszka
2009-11-17 16:58                         ` Jan Kiszka
2009-11-18 13:48                           ` Avi Kivity
2009-11-17 16:59                         ` Avi Kivity
2009-11-18  9:50                           ` Jan Kiszka
2009-11-18 13:46                             ` Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.